[PATCH] drm/amdgpu/gfx10: fix out-of-bound mqd_backup array access
Fixes: 4990f957c845 ("drm/amdgpu/gfx10: fix mqd backup/restore for gfx rings") Signed-off-by: Xiaojie Yuan --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a492174ef29b..52c27e49bc7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -454,8 +454,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) } ring = &adev->gfx.kiq.ring; - if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) - kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]); kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, -- 2.20.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu/gfx10: re-init clear state buffer after gpu reset
This patch fixes 2nd baco reset failure with gfxoff enabled on navi1x. clear state buffer (resides in vram) is corrupted after 1st baco reset, upon gfxoff exit, CPF gets garbage header in CSIB and hangs. Signed-off-by: Xiaojie Yuan --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 43 ++ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 9274bd4b6c68..8e24ea08ca39 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -1789,27 +1789,52 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); } -static void gfx_v10_0_init_csb(struct amdgpu_device *adev) +static int gfx_v10_0_init_csb(struct amdgpu_device *adev) { + int r; + + if (adev->in_gpu_reset) { + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (r) + return r; + + r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, + (void **)&adev->gfx.rlc.cs_ptr); + if (!r) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, + adev->gfx.rlc.cs_ptr); + amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); + } + + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + if (r) + return r; + } + /* csib */ WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, adev->gfx.rlc.clear_state_gpu_addr & 0xfffc); WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + + return 0; } -static void gfx_v10_0_init_pg(struct amdgpu_device *adev) +static int gfx_v10_0_init_pg(struct amdgpu_device *adev) { int i; + int r; - gfx_v10_0_init_csb(adev); + r = gfx_v10_0_init_csb(adev); + if (r) + return r; for (i = 0; i < adev->num_vmhubs; i++) amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); /* TODO: init power gating */ - return; + return 0; } void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) @@ -1911,7 +1936,10 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); if (r) return r; - gfx_v10_0_init_pg(adev); + + r = gfx_v10_0_init_pg(adev); + if (r) + return r; /* enable RLC SRM */ gfx_v10_0_rlc_enable_srm(adev); @@ -1937,7 +1965,10 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) return r; } - gfx_v10_0_init_pg(adev); + r = gfx_v10_0_init_pg(adev); + if (r) + return r; + adev->gfx.rlc.funcs->start(adev); if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { -- 2.20.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 5/5] drm/amdgpu: enable TMZ bit in FRAME_CONTROL for gfx10
BR, Aaron Liu > -Original Message- > From: Tuikov, Luben > Sent: Wednesday, November 20, 2019 7:52 AM > To: Liu, Aaron ; amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander ; Huang, Ray > ; Koenig, Christian > Subject: Re: [PATCH 5/5] drm/amdgpu: enable TMZ bit in FRAME_CONTROL > for gfx10 > > On 2019-11-18 12:18 a.m., Aaron Liu wrote: > > This patch enables TMZ bit in FRAME_CONTROL for gfx10. > > > > Signed-off-by: Aaron Liu > > --- > > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > index d6e11ee..8dce067 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > @@ -4588,7 +4588,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct > amdgpu_ring *ring, > > gfx_v10_0_ring_emit_ce_meta(ring, > > flags & AMDGPU_IB_PREEMPTED ? true : > false); > > > > - gfx_v10_0_ring_emit_tmz(ring, true, false); > > + gfx_v10_0_ring_emit_tmz(ring, true, trusted); > > > > Do you mean here "true" (the opposite of "false") as opposed to "trusted"? > Here need to consider both non-TMZ and TMZ. Trusted value is decided by cs.in.flags in amdgpu_cs_submit* from libdrm. > Regards, > Luben > > > dw2 |= 0x8000; /* set load_enable otherwise this package is just > NOPs */ > > if (flags & AMDGPU_HAVE_CTX_SWITCH) { > > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 1/5] drm/amdgpu: expand sdma copy_buffer interface with tmz parameter
BR, Aaron Liu > -Original Message- > From: Tuikov, Luben > Sent: Wednesday, November 20, 2019 7:12 AM > To: Liu, Aaron ; amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander ; Huang, Ray > ; Koenig, Christian > Subject: Re: [PATCH 1/5] drm/amdgpu: expand sdma copy_buffer interface > with tmz parameter > > I wonder if we really do need yet another function argument, thus increasing > the argument list, or if the "tmz" boolean can/is already a property of the > job/command/ib/etc., and if it can indeed be had from the latter entity...? > Hi Luben, In fact, I also thought about it. Compared to add this argument to other entities, perhaps it is more clearly and simply. Another reason is that TMZ is a relatively independent property. > Regards, > Luben > > On 2019-11-18 12:18 a.m., Aaron Liu wrote: > > This patch expands sdma copy_buffer interface with tmz parameter. > > > > Signed-off-by: Aaron Liu > > Reviewed-by: Christian König > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 5 +++-- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++-- > > drivers/gpu/drm/amd/amdgpu/cik_sdma.c| 3 ++- > > drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 3 ++- > > drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 3 ++- > > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 ++- > > drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 3 ++- > > drivers/gpu/drm/amd/amdgpu/si_dma.c | 3 ++- > > 8 files changed, 17 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > > index 761ff8b..b313465 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > > @@ -79,7 +79,8 @@ struct amdgpu_buffer_funcs { > > /* dst addr in bytes */ > > uint64_t dst_offset, > > /* number of byte to transfer */ > > -uint32_t byte_count); > > +uint32_t byte_count, > > +bool tmz); > > > > /* maximum bytes in a single operation */ > > uint32_tfill_max_bytes; > > @@ -97,7 +98,7 @@ struct amdgpu_buffer_funcs { > > uint32_t byte_count); > > }; > > > > -#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) > > (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) > > +#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) > > +(adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), > > +(t)) > > #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) > > (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) > > > > struct amdgpu_sdma_instance * > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > index 339088d..c08c15e 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > > @@ -2022,7 +2022,7 @@ static int amdgpu_map_buffer(struct > ttm_buffer_object *bo, > > dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); > > dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; > > amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, > > - dst_addr, num_bytes); > > + dst_addr, num_bytes, false); > > > > amdgpu_ring_pad_ib(ring, &job->ibs[0]); > > WARN_ON(job->ibs[0].length_dw > num_dw); @@ -2093,7 +2093,7 > @@ int > > amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, > > uint32_t cur_size_in_bytes = min(byte_count, max_bytes); > > > > amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, > > - dst_offset, cur_size_in_bytes); > > + dst_offset, cur_size_in_bytes, false); > > > > src_offset += cur_size_in_bytes; > > dst_offset += cur_size_in_bytes; > > diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > > b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > > index c45304f..82cdb8f 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > > +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > > @@ -1313,7 +1313,8 @@ static void cik_sdma_set_irq_funcs(struct > > amdgpu_device *adev) static void cik_sdma_emit_copy_buffer(struct > amdgpu_ib *ib, > > uint64_t src_offset, > > uint64_t dst_offset, > > - uint32_t byte_count) > > + uint32_t byte_count, > > + bool tmz) > > { > > ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, > SDMA_COPY_SUB_OPCODE_LINEAR, 0); > > ib->ptr[ib->length_dw++] = byte_count; diff --git > > a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > > b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > > index a101758..89e8c74 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > > +++ b/drivers/gpu/drm/amd/amdgpu/sdm
Re: [PATCH 01/12] amdgpu: add UAPI for creating encrypted buffers
On Tue, Nov 19, 2019 at 8:52 PM Luben Tuikov wrote: > On 2019-11-14 10:34 p.m., Aaron Liu wrote: > > From: Huang Rui > > > > To align the kernel uapi change from Alex: > > > > "Add a flag to the GEM_CREATE ioctl to create encrypted buffers. Buffers > with > > this flag set will be created with the TMZ bit set in the PTEs or engines > > accessing them. This is required in order to properly access the data > from the > > engines." > > > > We will use GEM_CREATE_ENCRYPTED flag for secure buffer allocation. > > > > Signed-off-by: Huang Rui > > Reviewed-by: Alex Deucher > > --- > > include/drm/amdgpu_drm.h | 5 + > > 1 file changed, 5 insertions(+) > > > > diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h > > index 5c28aa7..1a95e37 100644 > > --- a/include/drm/amdgpu_drm.h > > +++ b/include/drm/amdgpu_drm.h > > @@ -141,6 +141,11 @@ extern "C" { > > * releasing the memory > > */ > > #define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE (1 << 9) > > +/* Flag that BO will be encrypted and that the TMZ bit should be > > + * set in the PTEs when mapping this buffer via GPUVM or > > + * accessing it with various hw blocks > > + */ > > +#define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) > > Style! > TAB char?! > > You have a TAB char between ".._ENCRYPTED" and "(1 << 10)" > Do NOT add/insert TAB chars instead of space to align colunmns! > If when you press Tab key a tab is inserted, as opposed to the line > indented, then DO NOT use this editor. > The Tab key should "indent according to mode" by inserting TAB chars. > If the line is already indented, as this one is, then it should do nothing. > I disagree with this 100%. Tabs or spaces don't matter here from my perspective. I also disagree with your language. It's overly impolite. Marek ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 06/12] tests/amdgpu: add device handle as input param for exec_cs_helper and write_linear_helper (v4)
On 2019-11-14 10:34 p.m., Aaron Liu wrote: > From: Huang Rui > > This patch is to add add device handle as input param for exec_cs_helper > and write_linear_helper. > > Because they are needed in security tests. > > v2: fix typo that basic tests should be un-secure. > v3: refine the function implementation. > v4: remove amdgpu_cs_ctx_create3 calling. > > Signed-off-by: Huang Rui > Signed-off-by: Aaron Liu > Reviewed-by: Alex Deucher > --- > tests/amdgpu/amdgpu_test.h | 4 +++- > tests/amdgpu/basic_tests.c | 52 > +++--- > 2 files changed, 38 insertions(+), 18 deletions(-) > > diff --git a/tests/amdgpu/amdgpu_test.h b/tests/amdgpu/amdgpu_test.h > index 67be437..16c2da0 100644 > --- a/tests/amdgpu/amdgpu_test.h > +++ b/tests/amdgpu/amdgpu_test.h > @@ -263,7 +263,9 @@ CU_BOOL suite_security_tests_enable(void); > extern CU_TestInfo security_tests[]; > > extern void > -amdgpu_command_submission_write_linear_helper_with_secure(unsigned ip_type, > +amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle > + device, > + unsigned ip_type, > bool secure); > Wasn't this already done in patch 5? If yes, then it shouldn't be done here and only a single header file should contain the truth. If no, then why are two header files declaring this extern? Should be only one. Regards, Luben > /** > diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c > index 31c9a54..5154812 100644 > --- a/tests/amdgpu/basic_tests.c > +++ b/tests/amdgpu/basic_tests.c > @@ -1283,12 +1283,14 @@ static void amdgpu_command_submission_compute(void) > * pm4_src, resources, ib_info, and ibs_request > * submit command stream described in ibs_request and wait for this IB > accomplished > */ > -static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, > -unsigned ip_type, > -int instance, int pm4_dw, uint32_t > *pm4_src, > -int res_cnt, amdgpu_bo_handle *resources, > -struct amdgpu_cs_ib_info *ib_info, > -struct amdgpu_cs_request *ibs_request) > +static void > +amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle, > +amdgpu_context_handle context_handle, > +unsigned ip_type, int instance, int pm4_dw, > +uint32_t *pm4_src, int res_cnt, > +amdgpu_bo_handle *resources, > +struct amdgpu_cs_ib_info *ib_info, > +struct amdgpu_cs_request *ibs_request) > { > int r; > uint32_t expired; > @@ -1361,8 +1363,24 @@ static void > amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, > CU_ASSERT_EQUAL(r, 0); > } > > -void amdgpu_command_submission_write_linear_helper_with_secure(unsigned > ip_type, > -bool secure) > +static void > +amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, > +unsigned ip_type, int instance, int pm4_dw, > +uint32_t *pm4_src, int res_cnt, > +amdgpu_bo_handle *resources, > +struct amdgpu_cs_ib_info *ib_info, > +struct amdgpu_cs_request *ibs_request) > +{ > + amdgpu_test_exec_cs_helper_raw(device_handle, context_handle, > +ip_type, instance, pm4_dw, pm4_src, > +res_cnt, resources, ib_info, > +ibs_request); > +} > + > +void > +amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle > + device, unsigned > + ip_type, bool secure) > { > const int sdma_write_length = 128; > const int pm4_dw = 256; > @@ -1388,13 +1406,13 @@ void > amdgpu_command_submission_write_linear_helper_with_secure(unsigned ip_type, > ibs_request = calloc(1, sizeof(*ibs_request)); > CU_ASSERT_NOT_EQUAL(ibs_request, NULL); > > - r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); > + r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info); > CU_ASSERT_EQUAL(r, 0); > > for (i = 0; secure && (i < 2); i++) > gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED; > > - r = amdgpu_cs_ctx_create(device_handle, &context_handle); > + r = amdgpu_cs_ctx_create(device, &context_handle); > > CU_ASSERT_EQUAL(r, 0); > > @@ -1406,7 +1424,7 @@ void > amdgpu_command_submission_write_linear_helper_wit
Re: [PATCH 05/12] tests/amdgpu: expand write linear helper for security (v3)
On 2019-11-14 10:34 p.m., Aaron Liu wrote: > From: Huang Rui > > This patch expand write linear helper for security to submit the command > with secure context. > > v2: refine the function implementation. > v3: remove amdgpu_cs_ctx_create3. > > Signed-off-by: Huang Rui > Signed-off-by: Aaron Liu > Reviewed-by: Alex Deucher > --- > tests/amdgpu/amdgpu_test.h | 4 > tests/amdgpu/basic_tests.c | 15 +-- > 2 files changed, 17 insertions(+), 2 deletions(-) > > diff --git a/tests/amdgpu/amdgpu_test.h b/tests/amdgpu/amdgpu_test.h > index b7f8de2..67be437 100644 > --- a/tests/amdgpu/amdgpu_test.h > +++ b/tests/amdgpu/amdgpu_test.h > @@ -262,6 +262,9 @@ CU_BOOL suite_security_tests_enable(void); > */ > extern CU_TestInfo security_tests[]; > > +extern void > +amdgpu_command_submission_write_linear_helper_with_secure(unsigned ip_type, > + bool secure); > > /** > * Helper functions > @@ -452,4 +455,5 @@ static inline bool asic_is_arcturus(uint32_t asic_id) > } > } > > + > #endif /* #ifdef _AMDGPU_TEST_H_ */ > diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c > index a57dcbb..31c9a54 100644 > --- a/tests/amdgpu/basic_tests.c > +++ b/tests/amdgpu/basic_tests.c > @@ -71,7 +71,7 @@ static void > amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, > int res_cnt, amdgpu_bo_handle *resources, > struct amdgpu_cs_ib_info *ib_info, > struct amdgpu_cs_request *ibs_request); > - > + > CU_TestInfo basic_tests[] = { > { "Query Info Test", amdgpu_query_info_test }, > { "Userptr Test", amdgpu_userptr_test }, > @@ -1361,7 +1361,8 @@ static void > amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, > CU_ASSERT_EQUAL(r, 0); > } > > -static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) > +void amdgpu_command_submission_write_linear_helper_with_secure(unsigned > ip_type, > +bool secure) > { This is an example of bad naming of a function. And it is also very long. Too long. Why does the name need to end with "_secure("? Does it mean that the write is always secure? No! No, it doesn't! If the parameter, in this case the security state, is parameterized, as it is via a function argument, then you don't need to add this also to the name of the function, as you did. amdgpu_command_submission_write_linear_helper(unsigned ip_type, bool secure) is fine for a name. Leave it at that. Regards, Luben > const int sdma_write_length = 128; > const int pm4_dw = 256; > @@ -1390,7 +1391,11 @@ static void > amdgpu_command_submission_write_linear_helper(unsigned ip_type) > r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); > CU_ASSERT_EQUAL(r, 0); > > + for (i = 0; secure && (i < 2); i++) > + gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED; > + > r = amdgpu_cs_ctx_create(device_handle, &context_handle); > + > CU_ASSERT_EQUAL(r, 0); > > /* prepare resource */ > @@ -1469,6 +1474,12 @@ static void > amdgpu_command_submission_write_linear_helper(unsigned ip_type) > CU_ASSERT_EQUAL(r, 0); > } > > +static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) > +{ > + amdgpu_command_submission_write_linear_helper_with_secure(ip_type, > + false); > +} > + > static void amdgpu_command_submission_sdma_write_linear(void) > { > amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 01/12] amdgpu: add UAPI for creating encrypted buffers
On 2019-11-14 10:34 p.m., Aaron Liu wrote: > From: Huang Rui > > To align the kernel uapi change from Alex: > > "Add a flag to the GEM_CREATE ioctl to create encrypted buffers. Buffers with > this flag set will be created with the TMZ bit set in the PTEs or engines > accessing them. This is required in order to properly access the data from the > engines." > > We will use GEM_CREATE_ENCRYPTED flag for secure buffer allocation. > > Signed-off-by: Huang Rui > Reviewed-by: Alex Deucher > --- > include/drm/amdgpu_drm.h | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h > index 5c28aa7..1a95e37 100644 > --- a/include/drm/amdgpu_drm.h > +++ b/include/drm/amdgpu_drm.h > @@ -141,6 +141,11 @@ extern "C" { > * releasing the memory > */ > #define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE (1 << 9) > +/* Flag that BO will be encrypted and that the TMZ bit should be > + * set in the PTEs when mapping this buffer via GPUVM or > + * accessing it with various hw blocks > + */ > +#define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) Style! TAB char?! You have a TAB char between ".._ENCRYPTED" and "(1 << 10)" Do NOT add/insert TAB chars instead of space to align colunmns! If when you press Tab key a tab is inserted, as opposed to the line indented, then DO NOT use this editor. The Tab key should "indent according to mode" by inserting TAB chars. If the line is already indented, as this one is, then it should do nothing. Don't bunch up definitions together, nor their comments. You need an empty line after #define and before /* Flag that ... */ In comments please use present tense, not future tense! The comment can be improved like: /* Indicates that the BO is encrypted. This will set the TMZ * bit in PTEs when mapping this buffer. */ #define AMDGPU_GEM_CREATE_ENCRYPTED Regards, Luben > > /* Hybrid specific */ > /* Flag that the memory allocation should be from top of domain */ > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: Potential BUG: [PATCH 2/8] drm/amdgpu: add a generic fb accessing helper function(v3)
Hi Iago, Thank you for finding and reporting this potential double lock. Yes indeed, I see it--it can indeed happen. Now, since the primitives used--macros using "amdgpu_mm_(r|w)reg\(.*\)"--in "amdgpu_device_vram_access()" do use their own register-access spinlocks, it maybe wise to remove the spinlock take/release in "amdgpu_device_vram_access()". We'll look into it and possibly submit another patch. Thanks again. Regards, Luben On 2019-11-16 11:21 a.m., Iago Abal wrote: > Hi, > > With the help of a static bug finder (EBA - https://github.com/IagoAbal/eba) > I have found a potential double lock in Linux Next tag next-20191115, file > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c. > > This bug seems to be introduced by commit > e35e2b117f4 ("drm/amdgpu: add a generic fb accessing helper function(v3)"). > > The steps to reproduce it would be: > > 1. Start in function `amdgpu_device_vram_access`. > 2. Enter for-loop `for (last += pos; pos <= last; pos += 4)`. > 3. First lock: `spin_lock_irqsave(&adev->mmio_idx_lock, flags)`. > 4. Call to `WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x8000)`. > 5. Note `#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), > AMDGPU_REGS_NO_KIQ)`. > 6. Continue in function `amdgpu_mm_wreg`. > 7. Take else-branch in the third if-statement. > 8. Double lock: `spin_lock_irqsave(&adev->mmio_idx_lock, flags)`. > > I think the control flow could reach that second lock, but you may know > better. > > Hope it helps! > > -- iago ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 5/5] drm/amdgpu: enable TMZ bit in FRAME_CONTROL for gfx10
On 2019-11-18 12:18 a.m., Aaron Liu wrote: > This patch enables TMZ bit in FRAME_CONTROL for gfx10. > > Signed-off-by: Aaron Liu > --- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index d6e11ee..8dce067 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -4588,7 +4588,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct > amdgpu_ring *ring, > gfx_v10_0_ring_emit_ce_meta(ring, > flags & AMDGPU_IB_PREEMPTED ? true : false); > > - gfx_v10_0_ring_emit_tmz(ring, true, false); > + gfx_v10_0_ring_emit_tmz(ring, true, trusted); > Do you mean here "true" (the opposite of "false") as opposed to "trusted"? Regards, Luben > dw2 |= 0x8000; /* set load_enable otherwise this package is just > NOPs */ > if (flags & AMDGPU_HAVE_CTX_SWITCH) { > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/5] drm/amdgpu: expand sdma copy_buffer interface with tmz parameter
I wonder if we really do need yet another function argument, thus increasing the argument list, or if the "tmz" boolean can/is already a property of the job/command/ib/etc., and if it can indeed be had from the latter entity...? Regards, Luben On 2019-11-18 12:18 a.m., Aaron Liu wrote: > This patch expands sdma copy_buffer interface with tmz parameter. > > Signed-off-by: Aaron Liu > Reviewed-by: Christian König > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 5 +++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/cik_sdma.c| 3 ++- > drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 3 ++- > drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 3 ++- > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 ++- > drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 3 ++- > drivers/gpu/drm/amd/amdgpu/si_dma.c | 3 ++- > 8 files changed, 17 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > index 761ff8b..b313465 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > @@ -79,7 +79,8 @@ struct amdgpu_buffer_funcs { >/* dst addr in bytes */ >uint64_t dst_offset, >/* number of byte to transfer */ > - uint32_t byte_count); > + uint32_t byte_count, > + bool tmz); > > /* maximum bytes in a single operation */ > uint32_tfill_max_bytes; > @@ -97,7 +98,7 @@ struct amdgpu_buffer_funcs { >uint32_t byte_count); > }; > > -#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) > (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) > +#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) > (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) > #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) > (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) > > struct amdgpu_sdma_instance * > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 339088d..c08c15e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -2022,7 +2022,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object > *bo, > dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); > dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; > amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, > - dst_addr, num_bytes); > + dst_addr, num_bytes, false); > > amdgpu_ring_pad_ib(ring, &job->ibs[0]); > WARN_ON(job->ibs[0].length_dw > num_dw); > @@ -2093,7 +2093,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, > uint64_t src_offset, > uint32_t cur_size_in_bytes = min(byte_count, max_bytes); > > amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, > - dst_offset, cur_size_in_bytes); > + dst_offset, cur_size_in_bytes, false); > > src_offset += cur_size_in_bytes; > dst_offset += cur_size_in_bytes; > diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > index c45304f..82cdb8f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > @@ -1313,7 +1313,8 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device > *adev) > static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, > uint64_t src_offset, > uint64_t dst_offset, > - uint32_t byte_count) > + uint32_t byte_count, > + bool tmz) > { > ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, > SDMA_COPY_SUB_OPCODE_LINEAR, 0); > ib->ptr[ib->length_dw++] = byte_count; > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > index a101758..89e8c74 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > @@ -1200,7 +1200,8 @@ static void sdma_v2_4_set_irq_funcs(struct > amdgpu_device *adev) > static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, > uint64_t src_offset, > uint64_t dst_offset, > -uint32_t byte_count) > +uint32_t byte_count, > +bool tmz) > { > ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | > SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); > diff --git a/drivers/gpu/drm/amd/amd
Re: [PATCH] drm/dsc: Return unsigned long on compute offset
On 19/11/2019 16:09, Mikita Lipski wrote: On 19/11/2019 12:11, Ville Syrjälä wrote: On Tue, Nov 19, 2019 at 04:59:40PM +, Cornij, Nikola wrote: If you're going to make all of them the same, then u64, please. This is because I'm not sure if calculations require 64-bit at some stage. If it does then it's already broken. Someone should probably figure out what's actally needed instead of shooting ducks with an icbm. Sorry made a type below. Supposed to be "I don't think it is broken" I don't think it is not broken, cause I'm currently testing DSC. The patch I sent early simply fixes the error of comparing signed and unsigned variables. We can then submit a second patch addressing the issue of using unsigned long int instead of u32. Also, since the variables in drm_dsc_config structure are all of type u8 and u16, the calculation values shouldn't exceed the size of u32. Thanks -Original Message- From: Lipski, Mikita Sent: November 19, 2019 10:08 AM To: Ville Syrjälä ; Lipski, Mikita Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; Cornij, Nikola Subject: Re: [PATCH] drm/dsc: Return unsigned long on compute offset On 19/11/2019 09:56, Ville Syrjälä wrote: On Tue, Nov 19, 2019 at 09:45:26AM -0500, mikita.lip...@amd.com wrote: From: Mikita Lipski We shouldn't compare int with unsigned long to find the max value and since we are not expecting negative value returned from compute_offset we should make this function return unsigned long so we can compare the values when computing rc parameters. Why are there other unsigned longs in dsc parameter computation in the first place? I believe it was initially set to be unsigned long for variable consistency, when we ported intel_compute_rc_parameters into drm_dsc_compute_rc_parameters. But now that I look at it, we can actually just set them to u32 or u64, as nothing should exceed that. Cc: Nikola Cornij Cc: Harry Wentland Signed-off-by: Mikita Lipski --- drivers/gpu/drm/drm_dsc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c index 74f3527f567d..ec40604ab6a2 100644 --- a/drivers/gpu/drm/drm_dsc.c +++ b/drivers/gpu/drm/drm_dsc.c @@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct drm_dsc_picture_parameter_set *pps_payload, } EXPORT_SYMBOL(drm_dsc_pps_payload_pack); -static int compute_offset(struct drm_dsc_config *vdsc_cfg, int pixels_per_group, +static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int pixels_per_group, int groups_per_line, int grpcnt) { - int offset = 0; - int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, pixels_per_group); + unsigned long offset = 0; + unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, pixels_per_group); if (grpcnt <= grpcnt_id) offset = DIV_ROUND_UP(grpcnt * pixels_per_group * vdsc_cfg->bits_per_pixel, 16); -- 2.17.1 ___ dri-devel mailing list dri-de...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel -- Thanks, Mikita Lipski Software Engineer 2, AMD mikita.lip...@amd.com -- Thanks, Mikita Lipski Software Engineer 2, AMD mikita.lip...@amd.com ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/dsc: Return unsigned long on compute offset
On 19/11/2019 12:11, Ville Syrjälä wrote: On Tue, Nov 19, 2019 at 04:59:40PM +, Cornij, Nikola wrote: If you're going to make all of them the same, then u64, please. This is because I'm not sure if calculations require 64-bit at some stage. If it does then it's already broken. Someone should probably figure out what's actally needed instead of shooting ducks with an icbm. I don't think it is not broken, cause I'm currently testing DSC. The patch I sent early simply fixes the error of comparing signed and unsigned variables. We can then submit a second patch addressing the issue of using unsigned long int instead of u32. Also, since the variables in drm_dsc_config structure are all of type u8 and u16, the calculation values shouldn't exceed the size of u32. Thanks -Original Message- From: Lipski, Mikita Sent: November 19, 2019 10:08 AM To: Ville Syrjälä ; Lipski, Mikita Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; Cornij, Nikola Subject: Re: [PATCH] drm/dsc: Return unsigned long on compute offset On 19/11/2019 09:56, Ville Syrjälä wrote: On Tue, Nov 19, 2019 at 09:45:26AM -0500, mikita.lip...@amd.com wrote: From: Mikita Lipski We shouldn't compare int with unsigned long to find the max value and since we are not expecting negative value returned from compute_offset we should make this function return unsigned long so we can compare the values when computing rc parameters. Why are there other unsigned longs in dsc parameter computation in the first place? I believe it was initially set to be unsigned long for variable consistency, when we ported intel_compute_rc_parameters into drm_dsc_compute_rc_parameters. But now that I look at it, we can actually just set them to u32 or u64, as nothing should exceed that. Cc: Nikola Cornij Cc: Harry Wentland Signed-off-by: Mikita Lipski --- drivers/gpu/drm/drm_dsc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c index 74f3527f567d..ec40604ab6a2 100644 --- a/drivers/gpu/drm/drm_dsc.c +++ b/drivers/gpu/drm/drm_dsc.c @@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct drm_dsc_picture_parameter_set *pps_payload, } EXPORT_SYMBOL(drm_dsc_pps_payload_pack); -static int compute_offset(struct drm_dsc_config *vdsc_cfg, int pixels_per_group, +static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int pixels_per_group, int groups_per_line, int grpcnt) { - int offset = 0; - int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, pixels_per_group); + unsigned long offset = 0; + unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, pixels_per_group); if (grpcnt <= grpcnt_id) offset = DIV_ROUND_UP(grpcnt * pixels_per_group * vdsc_cfg->bits_per_pixel, 16); -- 2.17.1 ___ dri-devel mailing list dri-de...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel -- Thanks, Mikita Lipski Software Engineer 2, AMD mikita.lip...@amd.com -- Thanks, Mikita Lipski Software Engineer 2, AMD mikita.lip...@amd.com ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/display: add default clocks if not able to fetch them
dm_pp_get_clock_levels_by_type needs to add the default clocks to the powerplay case as well. This was accidently dropped. Fixes: b3ea88fef321de ("drm/amd/powerplay: add get_clock_by_type interface for display") Bug: https://gitlab.freedesktop.org/drm/amd/issues/906 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index 229788bee544..a2e1a73f66b8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -342,7 +342,8 @@ bool dm_pp_get_clock_levels_by_type( if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) { if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle, dc_to_pp_clock_type(clk_type), &pp_clks)) { - /* Error in pplib. Provide default values. */ + /* Error in pplib. Provide default values. */ + get_default_clock_levels(clk_type, dc_clks); return true; } } else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_clock_by_type) { -- 2.23.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] amd/amdgpu: force to trigger a no-retry-fault after a retry-fault
Am 19.11.19 um 17:45 schrieb Felix Kuehling: On 2019-11-19 11:37, Alex Sierra wrote: Only for the debugger use case. [why] Avoid endless translation retries, after an invalid address access has been issued to the GPU. Instead, the trap handler is forced to enter by generating a no-retry-fault. A s_trap instruction is inserted in the debugger case to let the wave to enter trap handler to save context. [how] Intentionally using an invalid flag combination (F and P set at the same time) to trigger a no-retry-fault, after a retry-fault happens. This is only valid under compute context. Change-Id: I4180c30e2631dc0401cbd6171f8a6776e4733c9a Signed-off-by: Alex Sierra This commit adds some unnecessary empty lines. See inline. With that fixed, the series is Reviewed-by: Felix Kuehling I actually like the empty lines, nicely emphases that a new block starts. Maybe note in the code comment that the flags combination is intentionally invalid as well. But either way the series is Reviewed-by: Christian König . Regards, Christian. Please also give Christian a chance to review. Thanks, Felix --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d51ac8771ae0..cd36195ff8be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3202,11 +3202,18 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | AMDGPU_PTE_SYSTEM; - if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { + if (vm->is_compute_context) { + /* Setting PTE flags to trigger a no-retry-fault */ + flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | + AMDGPU_PTE_TF; + value = 0; + Unnecessary blank line. + } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ value = adev->dummy_page_addr; flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE; + Unnecessary blank line. } else { /* Let the hw retry silently on the PTE */ value = 0; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v3 12/14] drm/amdgpu: Use mmu_interval_notifier instead of hmm_mirror
I test v3 and it works fine. Regards, Philip On 2019-11-12 3:22 p.m., Jason Gunthorpe wrote: From: Jason Gunthorpe Convert the collision-retry lock around hmm_range_fault to use the one now provided by the mmu_interval notifier. Although this driver does not seem to use the collision retry lock that hmm provides correctly, it can still be converted over to use the mmu_interval_notifier api instead of hmm_mirror without too much trouble. This also deletes another place where a driver is associating additional data (struct amdgpu_mn) with a mmu_struct. Signed-off-by: Philip Yang Reviewed-by: Philip Yang Tested-by: Philip Yang Signed-off-by: Jason Gunthorpe --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 148 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h| 49 -- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 116 -- 5 files changed, 94 insertions(+), 237 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 47700302a08b7f..1bcedb9b477dce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1738,6 +1738,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, return ret; } + /* +* FIXME: Cannot ignore the return code, must hold +* notifier_lock +*/ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); /* Mark the BO as valid unless it was invalidated diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 82823d9a8ba887..22c989bca7514c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -603,8 +603,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->tv.num_shared = 2; amdgpu_bo_list_get_list(p->bo_list, &p->validated); - if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); @@ -1287,11 +1285,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock. -* p->mn is hold until amdgpu_cs_submit is finished and fence is added -* to BOs. + /* No memory allocation is allowed while holding the notifier lock. +* The lock is held until amdgpu_cs_submit is finished and fence is +* added to BOs. */ - amdgpu_mn_lock(p->mn); + mutex_lock(&p->adev->notifier_lock); /* If userptr are invalidated after amdgpu_cs_parser_bos(), return * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. @@ -1334,13 +1332,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); return 0; error_abort: drm_sched_job_cleanup(&job->base); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); error_unlock: amdgpu_job_free(job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 9fe1c31ce17a30..828b5167ff128f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -50,28 +50,6 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" -/** - * amdgpu_mn_lock - take the write side lock for this notifier - * - * @mn: our notifier - */ -void amdgpu_mn_lock(struct amdgpu_mn *mn) -{ - if (mn) - down_write(&mn->lock); -} - -/** - * amdgpu_mn_unlock - drop the write side lock for this notifier - * - * @mn: our notifier - */ -void amdgpu_mn_unlock(struct amdgpu_mn *mn) -{ - if (mn) - up_write(&mn->lock); -} - /** * amdgpu_mn_invalidate_gfx - callback to notify about mm change * @@ -94,6 +72,9 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, return false; mutex_lock(&adev->notifier_lock); + + mmu_interval_set_seq(mni, cur_seq); + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); @@ -127,6 +108,9 @@ static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni, return false; mutex_lock(&adev->notifier_lock); + + mmu_interval_set_seq(mni, cur_seq); + amdgpu_amdkfd_evict_user
Re: [PATCH] drm/dsc: Return unsigned long on compute offset
On Tue, Nov 19, 2019 at 04:59:40PM +, Cornij, Nikola wrote: > If you're going to make all of them the same, then u64, please. > > This is because I'm not sure if calculations require 64-bit at some stage. If it does then it's already broken. Someone should probably figure out what's actally needed instead of shooting ducks with an icbm. > > -Original Message- > From: Lipski, Mikita > Sent: November 19, 2019 10:08 AM > To: Ville Syrjälä ; Lipski, Mikita > > Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; Cornij, > Nikola > Subject: Re: [PATCH] drm/dsc: Return unsigned long on compute offset > > > > On 19/11/2019 09:56, Ville Syrjälä wrote: > > On Tue, Nov 19, 2019 at 09:45:26AM -0500, mikita.lip...@amd.com wrote: > >> From: Mikita Lipski > >> > >> We shouldn't compare int with unsigned long to find the max value and > >> since we are not expecting negative value returned from > >> compute_offset we should make this function return unsigned long so > >> we can compare the values when computing rc parameters. > > > > Why are there other unsigned longs in dsc parameter computation in the > > first place? > > I believe it was initially set to be unsigned long for variable consistency, > when we ported intel_compute_rc_parameters into > drm_dsc_compute_rc_parameters. But now that I look at it, we can actually > just set them to u32 or u64, as nothing should exceed that. > > > >> > >> Cc: Nikola Cornij > >> Cc: Harry Wentland > >> Signed-off-by: Mikita Lipski > >> --- > >> drivers/gpu/drm/drm_dsc.c | 6 +++--- > >> 1 file changed, 3 insertions(+), 3 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c > >> index 74f3527f567d..ec40604ab6a2 100644 > >> --- a/drivers/gpu/drm/drm_dsc.c > >> +++ b/drivers/gpu/drm/drm_dsc.c > >> @@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct > >> drm_dsc_picture_parameter_set *pps_payload, > >> } > >> EXPORT_SYMBOL(drm_dsc_pps_payload_pack); > >> > >> -static int compute_offset(struct drm_dsc_config *vdsc_cfg, int > >> pixels_per_group, > >> +static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int > >> pixels_per_group, > >>int groups_per_line, int grpcnt) > >> { > >> - int offset = 0; > >> - int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, > >> pixels_per_group); > >> + unsigned long offset = 0; > >> + unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, > >> pixels_per_group); > >> > >>if (grpcnt <= grpcnt_id) > >>offset = DIV_ROUND_UP(grpcnt * pixels_per_group * > >> vdsc_cfg->bits_per_pixel, 16); > >> -- > >> 2.17.1 > >> > >> ___ > >> dri-devel mailing list > >> dri-de...@lists.freedesktop.org > >> https://lists.freedesktop.org/mailman/listinfo/dri-devel > > > > -- > Thanks, > Mikita Lipski > Software Engineer 2, AMD > mikita.lip...@amd.com -- Ville Syrjälä Intel ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/dsc: Return unsigned long on compute offset
If you're going to make all of them the same, then u64, please. This is because I'm not sure if calculations require 64-bit at some stage. -Original Message- From: Lipski, Mikita Sent: November 19, 2019 10:08 AM To: Ville Syrjälä ; Lipski, Mikita Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; Cornij, Nikola Subject: Re: [PATCH] drm/dsc: Return unsigned long on compute offset On 19/11/2019 09:56, Ville Syrjälä wrote: > On Tue, Nov 19, 2019 at 09:45:26AM -0500, mikita.lip...@amd.com wrote: >> From: Mikita Lipski >> >> We shouldn't compare int with unsigned long to find the max value and >> since we are not expecting negative value returned from >> compute_offset we should make this function return unsigned long so >> we can compare the values when computing rc parameters. > > Why are there other unsigned longs in dsc parameter computation in the > first place? I believe it was initially set to be unsigned long for variable consistency, when we ported intel_compute_rc_parameters into drm_dsc_compute_rc_parameters. But now that I look at it, we can actually just set them to u32 or u64, as nothing should exceed that. > >> >> Cc: Nikola Cornij >> Cc: Harry Wentland >> Signed-off-by: Mikita Lipski >> --- >> drivers/gpu/drm/drm_dsc.c | 6 +++--- >> 1 file changed, 3 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c >> index 74f3527f567d..ec40604ab6a2 100644 >> --- a/drivers/gpu/drm/drm_dsc.c >> +++ b/drivers/gpu/drm/drm_dsc.c >> @@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct >> drm_dsc_picture_parameter_set *pps_payload, >> } >> EXPORT_SYMBOL(drm_dsc_pps_payload_pack); >> >> -static int compute_offset(struct drm_dsc_config *vdsc_cfg, int >> pixels_per_group, >> +static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int >> pixels_per_group, >> int groups_per_line, int grpcnt) >> { >> -int offset = 0; >> -int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, >> pixels_per_group); >> +unsigned long offset = 0; >> +unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, >> pixels_per_group); >> >> if (grpcnt <= grpcnt_id) >> offset = DIV_ROUND_UP(grpcnt * pixels_per_group * >> vdsc_cfg->bits_per_pixel, 16); >> -- >> 2.17.1 >> >> ___ >> dri-devel mailing list >> dri-de...@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/dri-devel > -- Thanks, Mikita Lipski Software Engineer 2, AMD mikita.lip...@amd.com ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] amd/amdgpu: force to trigger a no-retry-fault after a retry-fault
On 2019-11-19 11:37, Alex Sierra wrote: Only for the debugger use case. [why] Avoid endless translation retries, after an invalid address access has been issued to the GPU. Instead, the trap handler is forced to enter by generating a no-retry-fault. A s_trap instruction is inserted in the debugger case to let the wave to enter trap handler to save context. [how] Intentionally using an invalid flag combination (F and P set at the same time) to trigger a no-retry-fault, after a retry-fault happens. This is only valid under compute context. Change-Id: I4180c30e2631dc0401cbd6171f8a6776e4733c9a Signed-off-by: Alex Sierra This commit adds some unnecessary empty lines. See inline. With that fixed, the series is Reviewed-by: Felix Kuehling Please also give Christian a chance to review. Thanks, Felix --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d51ac8771ae0..cd36195ff8be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3202,11 +3202,18 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | AMDGPU_PTE_SYSTEM; - if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { + if (vm->is_compute_context) { + /* Setting PTE flags to trigger a no-retry-fault */ + flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | + AMDGPU_PTE_TF; + value = 0; + Unnecessary blank line. + } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ value = adev->dummy_page_addr; flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE; + Unnecessary blank line. } else { /* Let the hw retry silently on the PTE */ value = 0; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amdgpu: add flag to indicate amdgpu vm context
Flag added to indicate if the amdgpu vm context is used for compute or graphics. Change-Id: Ia813037fda2ec2947d73f5c7328388078fbeebe5 Signed-off-by: Alex Sierra --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3c0bd6472a46..d51ac8771ae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2713,6 +2713,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto error_free_direct; vm->pte_support_ats = false; + vm->is_compute_context = false; if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & @@ -2898,6 +2899,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->update_funcs = &amdgpu_vm_sdma_funcs; dma_fence_put(vm->last_update); vm->last_update = NULL; + vm->is_compute_context = true; if (vm->pasid) { unsigned long flags; @@ -2952,6 +2954,7 @@ void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } vm->pasid = 0; + vm->is_compute_context = false; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 5cb25c1c54e0..76fcf853035c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -301,6 +301,8 @@ struct amdgpu_vm { struct ttm_lru_bulk_move lru_bulk_move; /* mark whether can do the bulk move */ boolbulk_moveable; + /* Flag to indicate if VM is used for compute */ + boolis_compute_context; }; struct amdgpu_vm_manager { -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] amd/amdgpu: force to trigger a no-retry-fault after a retry-fault
Only for the debugger use case. [why] Avoid endless translation retries, after an invalid address access has been issued to the GPU. Instead, the trap handler is forced to enter by generating a no-retry-fault. A s_trap instruction is inserted in the debugger case to let the wave to enter trap handler to save context. [how] Intentionally using an invalid flag combination (F and P set at the same time) to trigger a no-retry-fault, after a retry-fault happens. This is only valid under compute context. Change-Id: I4180c30e2631dc0401cbd6171f8a6776e4733c9a Signed-off-by: Alex Sierra --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d51ac8771ae0..cd36195ff8be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3202,11 +3202,18 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | AMDGPU_PTE_SYSTEM; - if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { + if (vm->is_compute_context) { + /* Setting PTE flags to trigger a no-retry-fault */ + flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | + AMDGPU_PTE_TF; + value = 0; + + } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ value = adev->dummy_page_addr; flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE; + } else { /* Let the hw retry silently on the PTE */ value = 0; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/dsc: Return unsigned long on compute offset
On 19/11/2019 09:56, Ville Syrjälä wrote: On Tue, Nov 19, 2019 at 09:45:26AM -0500, mikita.lip...@amd.com wrote: From: Mikita Lipski We shouldn't compare int with unsigned long to find the max value and since we are not expecting negative value returned from compute_offset we should make this function return unsigned long so we can compare the values when computing rc parameters. Why are there other unsigned longs in dsc parameter computation in the first place? I believe it was initially set to be unsigned long for variable consistency, when we ported intel_compute_rc_parameters into drm_dsc_compute_rc_parameters. But now that I look at it, we can actually just set them to u32 or u64, as nothing should exceed that. Cc: Nikola Cornij Cc: Harry Wentland Signed-off-by: Mikita Lipski --- drivers/gpu/drm/drm_dsc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c index 74f3527f567d..ec40604ab6a2 100644 --- a/drivers/gpu/drm/drm_dsc.c +++ b/drivers/gpu/drm/drm_dsc.c @@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct drm_dsc_picture_parameter_set *pps_payload, } EXPORT_SYMBOL(drm_dsc_pps_payload_pack); -static int compute_offset(struct drm_dsc_config *vdsc_cfg, int pixels_per_group, +static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int pixels_per_group, int groups_per_line, int grpcnt) { - int offset = 0; - int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, pixels_per_group); + unsigned long offset = 0; + unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, pixels_per_group); if (grpcnt <= grpcnt_id) offset = DIV_ROUND_UP(grpcnt * pixels_per_group * vdsc_cfg->bits_per_pixel, 16); -- 2.17.1 ___ dri-devel mailing list dri-de...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel -- Thanks, Mikita Lipski Software Engineer 2, AMD mikita.lip...@amd.com ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/dsc: Return unsigned long on compute offset
On Tue, Nov 19, 2019 at 09:45:26AM -0500, mikita.lip...@amd.com wrote: > From: Mikita Lipski > > We shouldn't compare int with unsigned long to find the max value > and since we are not expecting negative value returned from > compute_offset we should make this function return unsigned long > so we can compare the values when computing rc parameters. Why are there other unsigned longs in dsc parameter computation in the first place? > > Cc: Nikola Cornij > Cc: Harry Wentland > Signed-off-by: Mikita Lipski > --- > drivers/gpu/drm/drm_dsc.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c > index 74f3527f567d..ec40604ab6a2 100644 > --- a/drivers/gpu/drm/drm_dsc.c > +++ b/drivers/gpu/drm/drm_dsc.c > @@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct > drm_dsc_picture_parameter_set *pps_payload, > } > EXPORT_SYMBOL(drm_dsc_pps_payload_pack); > > -static int compute_offset(struct drm_dsc_config *vdsc_cfg, int > pixels_per_group, > +static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int > pixels_per_group, > int groups_per_line, int grpcnt) > { > - int offset = 0; > - int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, > pixels_per_group); > + unsigned long offset = 0; > + unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, > pixels_per_group); > > if (grpcnt <= grpcnt_id) > offset = DIV_ROUND_UP(grpcnt * pixels_per_group * > vdsc_cfg->bits_per_pixel, 16); > -- > 2.17.1 > > ___ > dri-devel mailing list > dri-de...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel -- Ville Syrjälä Intel ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/dsc: Return unsigned long on compute offset
From: Mikita Lipski We shouldn't compare int with unsigned long to find the max value and since we are not expecting negative value returned from compute_offset we should make this function return unsigned long so we can compare the values when computing rc parameters. Cc: Nikola Cornij Cc: Harry Wentland Signed-off-by: Mikita Lipski --- drivers/gpu/drm/drm_dsc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c index 74f3527f567d..ec40604ab6a2 100644 --- a/drivers/gpu/drm/drm_dsc.c +++ b/drivers/gpu/drm/drm_dsc.c @@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct drm_dsc_picture_parameter_set *pps_payload, } EXPORT_SYMBOL(drm_dsc_pps_payload_pack); -static int compute_offset(struct drm_dsc_config *vdsc_cfg, int pixels_per_group, +static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int pixels_per_group, int groups_per_line, int grpcnt) { - int offset = 0; - int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, pixels_per_group); + unsigned long offset = 0; + unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, pixels_per_group); if (grpcnt <= grpcnt_id) offset = DIV_ROUND_UP(grpcnt * pixels_per_group * vdsc_cfg->bits_per_pixel, 16); -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 3/3] drm/amdgpu: invalidate semphore workaround in gmc9/gmc10
Am 19.11.19 um 10:13 schrieb Changfeng.Zhu: From: changzhu It may lose gpuvm invalidate acknowldege state across power-gating off cycle. To avoid this issue in gmc9/gmc10 invalidation, add semaphore acquire before invalidation and semaphore release after invalidation. After adding semaphore acquire before invalidation, the semaphore register become read-only if another process try to acquire semaphore. Then it will not be able to release this semaphore. Then it may cause deadlock problem. If this deadlock problem happens, it needs a semaphore firmware fix. Change-Id: I9942a2f451265c1f1038ccfe2f70042c7c8118af Signed-off-by: changzhu Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 45 ++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 45 ++ drivers/gpu/drm/amd/amdgpu/soc15.h | 4 +-- 3 files changed, 92 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index af2615ba52aa..c47a163b88b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -234,6 +234,24 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, const unsigned eng = 17; unsigned int i; + spin_lock(&adev->gmc.invalidate_lock); + /* +* It may lose gpuvm invalidate acknowldege state across power-gating +* off cycle, add semaphore acquire before invalidation and semaphore +* release after invalidation to avoid entering power gated state +* to WA the Issue +*/ + for (i = 0; i < adev->usec_timeout; i++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -253,6 +271,14 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, udelay(1); } + /* +* add semaphore release after invalidation, +* write with 0 means semaphore release +*/ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + + spin_unlock(&adev->gmc.invalidate_lock); + if (i < adev->usec_timeout) return; @@ -338,6 +364,19 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* +* It may lose gpuvm invalidate acknowldege state across power-gating +* off cycle, add semaphore acquire before invalidation and semaphore +* release after invalidation to avoid entering power gated state +* to WA the Issue +*/ + + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + + DRM_WARN_ONCE("Adding semaphore may cause deadlock and it needs firmware fix\n"); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); @@ -348,6 +387,12 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, hub->vm_inv_eng0_ack + eng, req, 1 << vmid); + /* +* add semaphore release after invalidation, +* write with 0 means semaphore release +*/ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + return pd_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 1ae59af7836a..cfef219ced99 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -456,6 +456,24 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } spin_lock(&adev->gmc.invalidate_lock); + + /* +* It may lose gpuvm invalidate acknowldege state across power-gating +* off cycle, add semaphore acquire before invalidation and semaphore +* release after invalidation to avoid entering power gated state +* to WA the Issue +*/ + for (j = 0; j < adev->usec_timeout; j++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (j >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -
Re: [PATCH 2/3] drm/amdgpu: invalidate semphore workaround in amdgpu_virt
Am 19.11.19 um 10:12 schrieb Changfeng.Zhu: From: changzhu It may lose gpuvm invalidate acknowldege state across power-gating off cycle. To avoid this issue in virt invalidation, add semaphore acquire before invalidation and semaphore release after invalidation. Change-Id: Ie98304e475166b53eed033462d76423b6b0fc25b Signed-off-by: changzhu Still needs more cleanup, but for now that patch is Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 22 -- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 3 ++- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index f04eb1a64271..ee576158545e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -135,7 +135,8 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, - uint32_t ref, uint32_t mask) + uint32_t ref, uint32_t mask, + uint32_t sem) { struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; @@ -144,9 +145,26 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t seq; spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); + amdgpu_ring_alloc(ring, 60); + + /* +* It may lose gpuvm invalidate acknowldege state across power-gating +* off cycle, add semaphore acquire before invalidation and semaphore +* release after invalidation to avoid entering power gated state +* to WA the Issue +*/ + + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, sem, 0x1, 0x1); + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, ref, mask); + /* +* add semaphore release after invalidation, +* write with 0 means semaphore release +*/ + amdgpu_ring_emit_wreg(ring, sem, 0); + amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b0b2bdc750df..bda6a2f37dc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -295,7 +295,8 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t rreg1, - uint32_t ref, uint32_t mask); + uint32_t ref, uint32_t mask, + uint32_t sem); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f25cd97ba5f2..1ae59af7836a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -448,9 +448,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, !adev->in_gpu_reset) { uint32_t req = hub->vm_inv_eng0_req + eng; uint32_t ack = hub->vm_inv_eng0_ack + eng; + uint32_t sem = hub->vm_inv_eng0_sem + eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, - 1 << vmid); + 1 << vmid, sem); return; } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/3] drm/amdgpu: initialize vm_inv_eng0_sem for gfxhub and mmhub
Am 19.11.19 um 10:11 schrieb Changfeng.Zhu: From: changzhu SW must acquire/release one of the vm_invalidate_eng*_sem around the invalidation req/ack. Through this way,it can avoid losing invalidate acknowledge state across power-gating off cycle. To use vm_invalidate_eng*_sem, it needs to initialize vm_invalidate_eng*_sem firstly. Change-Id: Ic7abf481b08df085c326a98eba4b00d78f33560c Signed-off-by: changzhu Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 + drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 4 6 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 406736a1bd3d..b499a3de8bb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -77,6 +77,7 @@ struct amdgpu_gmc_fault { struct amdgpu_vmhub { uint32_tctx0_ptb_addr_lo32; uint32_tctx0_ptb_addr_hi32; + uint32_tvm_inv_eng0_sem; uint32_tvm_inv_eng0_req; uint32_tvm_inv_eng0_ack; uint32_tvm_context0_cntl; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 9ec4297e61e5..e91bd7945777 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -367,6 +367,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index b4f32d853ca1..b70c7b483c24 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -356,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 6965e1e6fa9e..28105e4af507 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -420,6 +420,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 945533634711..a7cb185d639a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -348,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 2c5adfe803a2..66efe2f7bd76 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -504,6 +504,10 @@ void mmhub_v9_4_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, +mmVML2VC0_VM_INVALIDATE_ENG0_SEM) + +i * MMHUB_INSTANCE_REGISTER_OFFSET; hub[i]->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVML2VC0_VM_INVALIDATE_ENG0_REQ) + ___
[PATCH 3/3] drm/amdgpu: invalidate semphore workaround in gmc9/gmc10
From: changzhu It may lose gpuvm invalidate acknowldege state across power-gating off cycle. To avoid this issue in gmc9/gmc10 invalidation, add semaphore acquire before invalidation and semaphore release after invalidation. After adding semaphore acquire before invalidation, the semaphore register become read-only if another process try to acquire semaphore. Then it will not be able to release this semaphore. Then it may cause deadlock problem. If this deadlock problem happens, it needs a semaphore firmware fix. Change-Id: I9942a2f451265c1f1038ccfe2f70042c7c8118af Signed-off-by: changzhu --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 45 ++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 45 ++ drivers/gpu/drm/amd/amdgpu/soc15.h | 4 +-- 3 files changed, 92 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index af2615ba52aa..c47a163b88b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -234,6 +234,24 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, const unsigned eng = 17; unsigned int i; + spin_lock(&adev->gmc.invalidate_lock); + /* +* It may lose gpuvm invalidate acknowldege state across power-gating +* off cycle, add semaphore acquire before invalidation and semaphore +* release after invalidation to avoid entering power gated state +* to WA the Issue +*/ + for (i = 0; i < adev->usec_timeout; i++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -253,6 +271,14 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, udelay(1); } + /* +* add semaphore release after invalidation, +* write with 0 means semaphore release +*/ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + + spin_unlock(&adev->gmc.invalidate_lock); + if (i < adev->usec_timeout) return; @@ -338,6 +364,19 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* +* It may lose gpuvm invalidate acknowldege state across power-gating +* off cycle, add semaphore acquire before invalidation and semaphore +* release after invalidation to avoid entering power gated state +* to WA the Issue +*/ + + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + + DRM_WARN_ONCE("Adding semaphore may cause deadlock and it needs firmware fix\n"); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); @@ -348,6 +387,12 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, hub->vm_inv_eng0_ack + eng, req, 1 << vmid); + /* +* add semaphore release after invalidation, +* write with 0 means semaphore release +*/ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + return pd_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 1ae59af7836a..cfef219ced99 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -456,6 +456,24 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } spin_lock(&adev->gmc.invalidate_lock); + + /* +* It may lose gpuvm invalidate acknowldege state across power-gating +* off cycle, add semaphore acquire before invalidation and semaphore +* release after invalidation to avoid entering power gated state +* to WA the Issue +*/ + for (j = 0; j < adev->usec_timeout; j++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (j >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -471,7 +489,15 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdg
[PATCH 2/3] drm/amdgpu: invalidate semphore workaround in amdgpu_virt
From: changzhu It may lose gpuvm invalidate acknowldege state across power-gating off cycle. To avoid this issue in virt invalidation, add semaphore acquire before invalidation and semaphore release after invalidation. Change-Id: Ie98304e475166b53eed033462d76423b6b0fc25b Signed-off-by: changzhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 22 -- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 3 ++- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index f04eb1a64271..ee576158545e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -135,7 +135,8 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, - uint32_t ref, uint32_t mask) + uint32_t ref, uint32_t mask, + uint32_t sem) { struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; @@ -144,9 +145,26 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t seq; spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); + amdgpu_ring_alloc(ring, 60); + + /* +* It may lose gpuvm invalidate acknowldege state across power-gating +* off cycle, add semaphore acquire before invalidation and semaphore +* release after invalidation to avoid entering power gated state +* to WA the Issue +*/ + + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, sem, 0x1, 0x1); + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, ref, mask); + /* +* add semaphore release after invalidation, +* write with 0 means semaphore release +*/ + amdgpu_ring_emit_wreg(ring, sem, 0); + amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b0b2bdc750df..bda6a2f37dc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -295,7 +295,8 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t rreg1, - uint32_t ref, uint32_t mask); + uint32_t ref, uint32_t mask, + uint32_t sem); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f25cd97ba5f2..1ae59af7836a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -448,9 +448,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, !adev->in_gpu_reset) { uint32_t req = hub->vm_inv_eng0_req + eng; uint32_t ack = hub->vm_inv_eng0_ack + eng; + uint32_t sem = hub->vm_inv_eng0_sem + eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, - 1 << vmid); + 1 << vmid, sem); return; } -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/3] drm/amdgpu: initialize vm_inv_eng0_sem for gfxhub and mmhub
From: changzhu SW must acquire/release one of the vm_invalidate_eng*_sem around the invalidation req/ack. Through this way,it can avoid losing invalidate acknowledge state across power-gating off cycle. To use vm_invalidate_eng*_sem, it needs to initialize vm_invalidate_eng*_sem firstly. Change-Id: Ic7abf481b08df085c326a98eba4b00d78f33560c Signed-off-by: changzhu --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 + drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 4 6 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 406736a1bd3d..b499a3de8bb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -77,6 +77,7 @@ struct amdgpu_gmc_fault { struct amdgpu_vmhub { uint32_tctx0_ptb_addr_lo32; uint32_tctx0_ptb_addr_hi32; + uint32_tvm_inv_eng0_sem; uint32_tvm_inv_eng0_req; uint32_tvm_inv_eng0_ack; uint32_tvm_context0_cntl; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 9ec4297e61e5..e91bd7945777 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -367,6 +367,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index b4f32d853ca1..b70c7b483c24 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -356,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 6965e1e6fa9e..28105e4af507 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -420,6 +420,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 945533634711..a7cb185d639a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -348,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 2c5adfe803a2..66efe2f7bd76 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -504,6 +504,10 @@ void mmhub_v9_4_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, +mmVML2VC0_VM_INVALIDATE_ENG0_SEM) + +i * MMHUB_INSTANCE_REGISTER_OFFSET; hub[i]->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVML2VC0_VM_INVALIDATE_ENG0_REQ) + -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedeskto
RE: [PATCH v2] drm/scheduler: Avoid accessing freed bad job.
Tested-by: Emily Deng >-Original Message- >From: Andrey Grodzovsky >Sent: Tuesday, November 19, 2019 1:52 AM >Cc: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Koenig, >Christian ; Deng, Emily >; Grodzovsky, Andrey > >Subject: [PATCH v2] drm/scheduler: Avoid accessing freed bad job. > >Problem: >Due to a race between drm_sched_cleanup_jobs in sched thread and >drm_sched_job_timedout in timeout work there is a possiblity that bad job >was already freed while still being accessed from the timeout thread. > >Fix: >Instead of just peeking at the bad job in the mirror list remove it from the >list >under lock and then put it back later when we are garanteed no race with >main sched thread is possible which is after the thread is parked. > >v2: Lock around processing ring_mirror_list in drm_sched_cleanup_jobs. > >Signed-off-by: Andrey Grodzovsky >--- > drivers/gpu/drm/scheduler/sched_main.c | 44 >+- > 1 file changed, 38 insertions(+), 6 deletions(-) > >diff --git a/drivers/gpu/drm/scheduler/sched_main.c >b/drivers/gpu/drm/scheduler/sched_main.c >index 80ddbdf..b05b210 100644 >--- a/drivers/gpu/drm/scheduler/sched_main.c >+++ b/drivers/gpu/drm/scheduler/sched_main.c >@@ -287,10 +287,24 @@ static void drm_sched_job_timedout(struct >work_struct *work) > unsigned long flags; > > sched = container_of(work, struct drm_gpu_scheduler, >work_tdr.work); >+ >+ /* >+ * Protects against concurrent deletion in drm_sched_cleanup_jobs >that >+ * is already in progress. >+ */ >+ spin_lock_irqsave(&sched->job_list_lock, flags); > job = list_first_entry_or_null(&sched->ring_mirror_list, > struct drm_sched_job, node); > > if (job) { >+ /* >+ * Remove the bad job so it cannot be freed by already in >progress >+ * drm_sched_cleanup_jobs. It will be reinsrted back after >sched->thread >+ * is parked at which point it's safe. >+ */ >+ list_del_init(&job->node); >+ spin_unlock_irqrestore(&sched->job_list_lock, flags); >+ > job->sched->ops->timedout_job(job); > > /* >@@ -302,6 +316,8 @@ static void drm_sched_job_timedout(struct >work_struct *work) > sched->free_guilty = false; > } > } >+ else >+ spin_unlock_irqrestore(&sched->job_list_lock, flags); > > spin_lock_irqsave(&sched->job_list_lock, flags); > drm_sched_start_timeout(sched); >@@ -373,6 +389,19 @@ void drm_sched_stop(struct drm_gpu_scheduler >*sched, struct drm_sched_job *bad) > kthread_park(sched->thread); > > /* >+ * Reinsert back the bad job here - now it's safe as >drm_sched_cleanup_jobs >+ * cannot race against us and release the bad job at this point - we >parked >+ * (waited for) any in progress (earlier) cleanups and any later ones >will >+ * bail out due to sched->thread being parked. >+ */ >+ if (bad && bad->sched == sched) >+ /* >+ * Add at the head of the queue to reflect it was the earliest >+ * job extracted. >+ */ >+ list_add(&bad->node, &sched->ring_mirror_list); >+ >+ /* >* Iterate the job list from later to earlier one and either deactive >* their HW callbacks or remove them from mirror list if they already >* signaled. >@@ -656,16 +685,19 @@ static void drm_sched_cleanup_jobs(struct >drm_gpu_scheduler *sched) > __kthread_should_park(sched->thread)) > return; > >- >- while (!list_empty(&sched->ring_mirror_list)) { >+ /* See drm_sched_job_timedout for why the locking is here */ >+ while (true) { > struct drm_sched_job *job; > >- job = list_first_entry(&sched->ring_mirror_list, >- struct drm_sched_job, node); >- if (!dma_fence_is_signaled(&job->s_fence->finished)) >+ spin_lock_irqsave(&sched->job_list_lock, flags); >+ job = list_first_entry_or_null(&sched->ring_mirror_list, >+ struct drm_sched_job, node); >+ >+ if (!job || !dma_fence_is_signaled(&job->s_fence->finished)) { >+ spin_unlock_irqrestore(&sched->job_list_lock, flags); > break; >+ } > >- spin_lock_irqsave(&sched->job_list_lock, flags); > /* remove job from ring_mirror_list */ > list_del_init(&job->node); > spin_unlock_irqrestore(&sched->job_list_lock, flags); >-- >2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH v2 1/1] drm: Prefer pcie_capability_read_word()
Hi, Alex Deucher wrote on 11/18/19 11:42 AM: On Mon, Nov 18, 2019 at 3:37 AM Frederick Lawler wrote: Commit 8c0d3a02c130 ("PCI: Add accessors for PCI Express Capability") added accessors for the PCI Express Capability so that drivers didn't need to be aware of differences between v1 and v2 of the PCI Express Capability. Replace pci_read_config_word() and pci_write_config_word() calls with pcie_capability_read_word() and pcie_capability_write_word(). Signed-off-by: Frederick Lawler --- V2 - Squash both drm commits into one - Rebase ontop of d46eac1e658b --- drivers/gpu/drm/amd/amdgpu/cik.c | 63 --- drivers/gpu/drm/amd/amdgpu/si.c | 71 +++ drivers/gpu/drm/radeon/cik.c | 70 ++ drivers/gpu/drm/radeon/si.c | 73 Can you split this into two patches? One for amdgpu and one for radeon? Thanks! Sure thing! I also realize I didn't say where the magical commit ref came from. It came from Bjorns pci/misc tree. I'll get that out to you Wednesday. Alex 4 files changed, 174 insertions(+), 103 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 3067bb874032..f369e3408ed2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1384,7 +1384,6 @@ static int cik_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) static void cik_pcie_gen3_enable(struct amdgpu_device *adev) { struct pci_dev *root = adev->pdev->bus->self; - int bridge_pos, gpu_pos; u32 speed_cntl, current_data_rate; int i; u16 tmp16; @@ -1419,12 +1418,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n"); } - bridge_pos = pci_pcie_cap(root); - if (!bridge_pos) - return; - - gpu_pos = pci_pcie_cap(adev->pdev); - if (!gpu_pos) + if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev)) return; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) { @@ -1434,14 +1428,17 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) u16 bridge_cfg2, gpu_cfg2; u32 max_lw, current_lw, tmp; - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &bridge_cfg); + pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL, + &gpu_cfg); tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; - pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16); tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; - pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL, + tmp16); tmp = RREG32_PCIE(ixPCIE_LC_STATUS1); max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> @@ -1465,15 +1462,23 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) for (i = 0; i < 10; i++) { /* check status */ - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); + pcie_capability_read_word(adev->pdev, + PCI_EXP_DEVSTA, + &tmp16); if (tmp16 & PCI_EXP_DEVSTA_TRPND) break; - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + pcie_capability_read_word(root, PCI_EXP_LNKCTL, + &bridge_cfg); + pcie_capability_read_word(adev->pdev, + PCI_EXP_LNKCTL, + &gpu_cfg); - pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); - pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); + pcie