[PATCH] drm/amdgpu/gfx10: fix out-of-bound mqd_backup array access

2019-11-19 Thread Xiaojie Yuan
Fixes: 4990f957c845 ("drm/amdgpu/gfx10: fix mqd backup/restore for gfx rings")
Signed-off-by: Xiaojie Yuan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a492174ef29b..52c27e49bc7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -454,8 +454,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
}
 
ring = &adev->gfx.kiq.ring;
-   if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring)
-   kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]);
kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
amdgpu_bo_free_kernel(&ring->mqd_obj,
  &ring->mqd_gpu_addr,
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu/gfx10: re-init clear state buffer after gpu reset

2019-11-19 Thread Xiaojie Yuan
This patch fixes 2nd baco reset failure with gfxoff enabled on navi1x.

clear state buffer (resides in vram) is corrupted after 1st baco reset,
upon gfxoff exit, CPF gets garbage header in CSIB and hangs.

Signed-off-by: Xiaojie Yuan 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 43 ++
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 9274bd4b6c68..8e24ea08ca39 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1789,27 +1789,52 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct 
amdgpu_device *adev,
WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
 }
 
-static void gfx_v10_0_init_csb(struct amdgpu_device *adev)
+static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
 {
+   int r;
+
+   if (adev->in_gpu_reset) {
+   r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
+   if (r)
+   return r;
+
+   r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj,
+  (void **)&adev->gfx.rlc.cs_ptr);
+   if (!r) {
+   adev->gfx.rlc.funcs->get_csb_buffer(adev,
+   adev->gfx.rlc.cs_ptr);
+   amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
+   }
+
+   amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
+   if (r)
+   return r;
+   }
+
/* csib */
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
 adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO,
 adev->gfx.rlc.clear_state_gpu_addr & 0xfffc);
WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+
+   return 0;
 }
 
-static void gfx_v10_0_init_pg(struct amdgpu_device *adev)
+static int gfx_v10_0_init_pg(struct amdgpu_device *adev)
 {
int i;
+   int r;
 
-   gfx_v10_0_init_csb(adev);
+   r = gfx_v10_0_init_csb(adev);
+   if (r)
+   return r;
 
for (i = 0; i < adev->num_vmhubs; i++)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
 
/* TODO: init power gating */
-   return;
+   return 0;
 }
 
 void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
@@ -1911,7 +1936,10 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device 
*adev)
r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
if (r)
return r;
-   gfx_v10_0_init_pg(adev);
+
+   r = gfx_v10_0_init_pg(adev);
+   if (r)
+   return r;
 
/* enable RLC SRM */
gfx_v10_0_rlc_enable_srm(adev);
@@ -1937,7 +1965,10 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device 
*adev)
return r;
}
 
-   gfx_v10_0_init_pg(adev);
+   r = gfx_v10_0_init_pg(adev);
+   if (r)
+   return r;
+
adev->gfx.rlc.funcs->start(adev);
 
if (adev->firmware.load_type == 
AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 5/5] drm/amdgpu: enable TMZ bit in FRAME_CONTROL for gfx10

2019-11-19 Thread Liu, Aaron


BR,
Aaron Liu

> -Original Message-
> From: Tuikov, Luben 
> Sent: Wednesday, November 20, 2019 7:52 AM
> To: Liu, Aaron ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Huang, Ray
> ; Koenig, Christian 
> Subject: Re: [PATCH 5/5] drm/amdgpu: enable TMZ bit in FRAME_CONTROL
> for gfx10
> 
> On 2019-11-18 12:18 a.m., Aaron Liu wrote:
> > This patch enables TMZ bit in FRAME_CONTROL for gfx10.
> >
> > Signed-off-by: Aaron Liu 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index d6e11ee..8dce067 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -4588,7 +4588,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct
> amdgpu_ring *ring,
> > gfx_v10_0_ring_emit_ce_meta(ring,
> > flags & AMDGPU_IB_PREEMPTED ? true :
> false);
> >
> > -   gfx_v10_0_ring_emit_tmz(ring, true, false);
> > +   gfx_v10_0_ring_emit_tmz(ring, true, trusted);
> >
> 
> Do you mean here "true" (the opposite of "false") as opposed to "trusted"?
> 
Here need to consider both non-TMZ and TMZ. Trusted value is decided by 
cs.in.flags in
amdgpu_cs_submit* from libdrm.

> Regards,
> Luben
> 
> > dw2 |= 0x8000; /* set load_enable otherwise this package is just
> NOPs */
> > if (flags & AMDGPU_HAVE_CTX_SWITCH) {
> >

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 1/5] drm/amdgpu: expand sdma copy_buffer interface with tmz parameter

2019-11-19 Thread Liu, Aaron


BR,
Aaron Liu

> -Original Message-
> From: Tuikov, Luben 
> Sent: Wednesday, November 20, 2019 7:12 AM
> To: Liu, Aaron ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Huang, Ray
> ; Koenig, Christian 
> Subject: Re: [PATCH 1/5] drm/amdgpu: expand sdma copy_buffer interface
> with tmz parameter
> 
> I wonder if we really do need yet another function argument, thus increasing
> the argument list, or if the "tmz" boolean can/is already a property of the
> job/command/ib/etc., and if it can indeed be had from the latter entity...?
> 
Hi Luben,
In fact, I also thought about it. Compared to add this argument to other 
entities, perhaps it 
is more clearly and simply. Another reason is that TMZ is a relatively 
independent property.

> Regards,
> Luben
> 
> On 2019-11-18 12:18 a.m., Aaron Liu wrote:
> > This patch expands sdma copy_buffer interface with tmz parameter.
> >
> > Signed-off-by: Aaron Liu 
> > Reviewed-by: Christian König 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 5 +++--
> > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 4 ++--
> >  drivers/gpu/drm/amd/amdgpu/cik_sdma.c| 3 ++-
> >  drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   | 3 ++-
> >  drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   | 3 ++-
> >  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 3 ++-
> >  drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 3 ++-
> >  drivers/gpu/drm/amd/amdgpu/si_dma.c  | 3 ++-
> >  8 files changed, 17 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> > index 761ff8b..b313465 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> > @@ -79,7 +79,8 @@ struct amdgpu_buffer_funcs {
> >  /* dst addr in bytes */
> >  uint64_t dst_offset,
> >  /* number of byte to transfer */
> > -uint32_t byte_count);
> > +uint32_t byte_count,
> > +bool tmz);
> >
> > /* maximum bytes in a single operation */
> > uint32_tfill_max_bytes;
> > @@ -97,7 +98,7 @@ struct amdgpu_buffer_funcs {
> >  uint32_t byte_count);
> >  };
> >
> > -#define amdgpu_emit_copy_buffer(adev, ib, s, d, b)
> > (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b))
> > +#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t)
> > +(adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b),
> > +(t))
> >  #define amdgpu_emit_fill_buffer(adev, ib, s, d, b)
> > (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
> >
> >  struct amdgpu_sdma_instance *
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 339088d..c08c15e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -2022,7 +2022,7 @@ static int amdgpu_map_buffer(struct
> ttm_buffer_object *bo,
> > dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
> > dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
> > amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
> > -   dst_addr, num_bytes);
> > +   dst_addr, num_bytes, false);
> >
> > amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> > WARN_ON(job->ibs[0].length_dw > num_dw); @@ -2093,7 +2093,7
> @@ int
> > amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> > uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
> >
> > amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
> > -   dst_offset, cur_size_in_bytes);
> > +   dst_offset, cur_size_in_bytes, false);
> >
> > src_offset += cur_size_in_bytes;
> > dst_offset += cur_size_in_bytes;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > index c45304f..82cdb8f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > @@ -1313,7 +1313,8 @@ static void cik_sdma_set_irq_funcs(struct
> > amdgpu_device *adev)  static void cik_sdma_emit_copy_buffer(struct
> amdgpu_ib *ib,
> >   uint64_t src_offset,
> >   uint64_t dst_offset,
> > - uint32_t byte_count)
> > + uint32_t byte_count,
> > + bool tmz)
> >  {
> > ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
> SDMA_COPY_SUB_OPCODE_LINEAR, 0);
> > ib->ptr[ib->length_dw++] = byte_count; diff --git
> > a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > index a101758..89e8c74 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdm

Re: [PATCH 01/12] amdgpu: add UAPI for creating encrypted buffers

2019-11-19 Thread Marek Olšák
On Tue, Nov 19, 2019 at 8:52 PM Luben Tuikov  wrote:

> On 2019-11-14 10:34 p.m., Aaron Liu wrote:
> > From: Huang Rui 
> >
> > To align the kernel uapi change from Alex:
> >
> > "Add a flag to the GEM_CREATE ioctl to create encrypted buffers. Buffers
> with
> > this flag set will be created with the TMZ bit set in the PTEs or engines
> > accessing them. This is required in order to properly access the data
> from the
> > engines."
> >
> > We will use GEM_CREATE_ENCRYPTED flag for secure buffer allocation.
> >
> > Signed-off-by: Huang Rui 
> > Reviewed-by: Alex Deucher 
> > ---
> >  include/drm/amdgpu_drm.h | 5 +
> >  1 file changed, 5 insertions(+)
> >
> > diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
> > index 5c28aa7..1a95e37 100644
> > --- a/include/drm/amdgpu_drm.h
> > +++ b/include/drm/amdgpu_drm.h
> > @@ -141,6 +141,11 @@ extern "C" {
> >   * releasing the memory
> >   */
> >  #define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE   (1 << 9)
> > +/* Flag that BO will be encrypted and that the TMZ bit should be
> > + * set in the PTEs when mapping this buffer via GPUVM or
> > + * accessing it with various hw blocks
> > + */
> > +#define AMDGPU_GEM_CREATE_ENCRYPTED  (1 << 10)
>
> Style!
> TAB char?!
>
> You have a TAB char between ".._ENCRYPTED" and "(1 << 10)"
> Do NOT add/insert TAB chars instead of space to align colunmns!
> If when you press Tab key a tab is inserted, as opposed to the line
> indented, then DO NOT use this editor.
> The Tab key should "indent according to mode" by inserting TAB chars.
> If the line is already indented, as this one is, then it should do nothing.
>

I disagree with this 100%. Tabs or spaces don't matter here from my
perspective. I also disagree with your language. It's overly impolite.

Marek
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 06/12] tests/amdgpu: add device handle as input param for exec_cs_helper and write_linear_helper (v4)

2019-11-19 Thread Luben Tuikov
On 2019-11-14 10:34 p.m., Aaron Liu wrote:
> From: Huang Rui 
> 
> This patch is to add add device handle as input param for exec_cs_helper
> and write_linear_helper.
> 
> Because they are needed in security tests.
> 
> v2: fix typo that basic tests should be un-secure.
> v3: refine the function implementation.
> v4: remove amdgpu_cs_ctx_create3 calling.
> 
> Signed-off-by: Huang Rui 
> Signed-off-by: Aaron Liu 
> Reviewed-by: Alex Deucher 
> ---
>  tests/amdgpu/amdgpu_test.h |  4 +++-
>  tests/amdgpu/basic_tests.c | 52 
> +++---
>  2 files changed, 38 insertions(+), 18 deletions(-)
> 
> diff --git a/tests/amdgpu/amdgpu_test.h b/tests/amdgpu/amdgpu_test.h
> index 67be437..16c2da0 100644
> --- a/tests/amdgpu/amdgpu_test.h
> +++ b/tests/amdgpu/amdgpu_test.h
> @@ -263,7 +263,9 @@ CU_BOOL suite_security_tests_enable(void);
>  extern CU_TestInfo security_tests[];
>  
>  extern void
> -amdgpu_command_submission_write_linear_helper_with_secure(unsigned ip_type,
> +amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
> +   device,
> +   unsigned ip_type,
> bool secure);
>  

Wasn't this already done in patch 5?
If yes, then it shouldn't be done here and only a single header file
should contain the truth.
If no, then why are two header files declaring this extern?
Should be only one.

Regards,
Luben

>  /**
> diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
> index 31c9a54..5154812 100644
> --- a/tests/amdgpu/basic_tests.c
> +++ b/tests/amdgpu/basic_tests.c
> @@ -1283,12 +1283,14 @@ static void amdgpu_command_submission_compute(void)
>   * pm4_src, resources, ib_info, and ibs_request
>   * submit command stream described in ibs_request and wait for this IB 
> accomplished
>   */
> -static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
> -unsigned ip_type,
> -int instance, int pm4_dw, uint32_t 
> *pm4_src,
> -int res_cnt, amdgpu_bo_handle *resources,
> -struct amdgpu_cs_ib_info *ib_info,
> -struct amdgpu_cs_request *ibs_request)
> +static void
> +amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
> +amdgpu_context_handle context_handle,
> +unsigned ip_type, int instance, int pm4_dw,
> +uint32_t *pm4_src, int res_cnt,
> +amdgpu_bo_handle *resources,
> +struct amdgpu_cs_ib_info *ib_info,
> +struct amdgpu_cs_request *ibs_request)
>  {
>   int r;
>   uint32_t expired;
> @@ -1361,8 +1363,24 @@ static void 
> amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>   CU_ASSERT_EQUAL(r, 0);
>  }
>  
> -void amdgpu_command_submission_write_linear_helper_with_secure(unsigned 
> ip_type,
> -bool secure)
> +static void
> +amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
> +unsigned ip_type, int instance, int pm4_dw,
> +uint32_t *pm4_src, int res_cnt,
> +amdgpu_bo_handle *resources,
> +struct amdgpu_cs_ib_info *ib_info,
> +struct amdgpu_cs_request *ibs_request)
> +{
> + amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
> +ip_type, instance, pm4_dw, pm4_src,
> +res_cnt, resources, ib_info,
> +ibs_request);
> +}
> +
> +void
> +amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
> +   device, unsigned
> +   ip_type, bool secure)
>  {
>   const int sdma_write_length = 128;
>   const int pm4_dw = 256;
> @@ -1388,13 +1406,13 @@ void 
> amdgpu_command_submission_write_linear_helper_with_secure(unsigned ip_type,
>   ibs_request = calloc(1, sizeof(*ibs_request));
>   CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
>  
> - r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
> + r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
>   CU_ASSERT_EQUAL(r, 0);
>  
>   for (i = 0; secure && (i < 2); i++)
>   gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
>  
> - r = amdgpu_cs_ctx_create(device_handle, &context_handle);
> + r = amdgpu_cs_ctx_create(device, &context_handle);
>  
>   CU_ASSERT_EQUAL(r, 0);
>  
> @@ -1406,7 +1424,7 @@ void 
> amdgpu_command_submission_write_linear_helper_wit

Re: [PATCH 05/12] tests/amdgpu: expand write linear helper for security (v3)

2019-11-19 Thread Luben Tuikov
On 2019-11-14 10:34 p.m., Aaron Liu wrote:
> From: Huang Rui 
> 
> This patch expand write linear helper for security to submit the command
> with secure context.
> 
> v2: refine the function implementation.
> v3: remove amdgpu_cs_ctx_create3.
> 
> Signed-off-by: Huang Rui 
> Signed-off-by: Aaron Liu 
> Reviewed-by: Alex Deucher 
> ---
>  tests/amdgpu/amdgpu_test.h |  4 
>  tests/amdgpu/basic_tests.c | 15 +--
>  2 files changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/tests/amdgpu/amdgpu_test.h b/tests/amdgpu/amdgpu_test.h
> index b7f8de2..67be437 100644
> --- a/tests/amdgpu/amdgpu_test.h
> +++ b/tests/amdgpu/amdgpu_test.h
> @@ -262,6 +262,9 @@ CU_BOOL suite_security_tests_enable(void);
>   */
>  extern CU_TestInfo security_tests[];
>  
> +extern void
> +amdgpu_command_submission_write_linear_helper_with_secure(unsigned ip_type,
> +   bool secure);
>  
>  /**
>   * Helper functions
> @@ -452,4 +455,5 @@ static inline bool asic_is_arcturus(uint32_t asic_id)
>   }
>  }
>  
> +
>  #endif  /* #ifdef _AMDGPU_TEST_H_ */
> diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
> index a57dcbb..31c9a54 100644
> --- a/tests/amdgpu/basic_tests.c
> +++ b/tests/amdgpu/basic_tests.c
> @@ -71,7 +71,7 @@ static void 
> amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>  int res_cnt, amdgpu_bo_handle *resources,
>  struct amdgpu_cs_ib_info *ib_info,
>  struct amdgpu_cs_request *ibs_request);
> - 
> +
>  CU_TestInfo basic_tests[] = {
>   { "Query Info Test",  amdgpu_query_info_test },
>   { "Userptr Test",  amdgpu_userptr_test },
> @@ -1361,7 +1361,8 @@ static void 
> amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
>   CU_ASSERT_EQUAL(r, 0);
>  }
>  
> -static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
> +void amdgpu_command_submission_write_linear_helper_with_secure(unsigned 
> ip_type,
> +bool secure)
>  {

This is an example of bad naming of a function. And it is also very long. Too 
long.

Why does the name need to end with "_secure("? Does it mean that the write is 
always
secure? No! No, it doesn't! If the parameter, in this case the security state, 
is
parameterized, as it is via a function argument, then you don't need to add this
also to the name of the function, as you did.

amdgpu_command_submission_write_linear_helper(unsigned ip_type, bool secure)

is fine for a name. Leave it at that.

Regards,
Luben

>   const int sdma_write_length = 128;
>   const int pm4_dw = 256;
> @@ -1390,7 +1391,11 @@ static void 
> amdgpu_command_submission_write_linear_helper(unsigned ip_type)
>   r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
>   CU_ASSERT_EQUAL(r, 0);
>  
> + for (i = 0; secure && (i < 2); i++)
> + gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
> +
>   r = amdgpu_cs_ctx_create(device_handle, &context_handle);
> +
>   CU_ASSERT_EQUAL(r, 0);
>  
>   /* prepare resource */
> @@ -1469,6 +1474,12 @@ static void 
> amdgpu_command_submission_write_linear_helper(unsigned ip_type)
>   CU_ASSERT_EQUAL(r, 0);
>  }
>  
> +static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
> +{
> + amdgpu_command_submission_write_linear_helper_with_secure(ip_type,
> +   false);
> +}
> +
>  static void amdgpu_command_submission_sdma_write_linear(void)
>  {
>   amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
> 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 01/12] amdgpu: add UAPI for creating encrypted buffers

2019-11-19 Thread Luben Tuikov
On 2019-11-14 10:34 p.m., Aaron Liu wrote:
> From: Huang Rui 
> 
> To align the kernel uapi change from Alex:
> 
> "Add a flag to the GEM_CREATE ioctl to create encrypted buffers. Buffers with
> this flag set will be created with the TMZ bit set in the PTEs or engines
> accessing them. This is required in order to properly access the data from the
> engines."
> 
> We will use GEM_CREATE_ENCRYPTED flag for secure buffer allocation.
> 
> Signed-off-by: Huang Rui 
> Reviewed-by: Alex Deucher 
> ---
>  include/drm/amdgpu_drm.h | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
> index 5c28aa7..1a95e37 100644
> --- a/include/drm/amdgpu_drm.h
> +++ b/include/drm/amdgpu_drm.h
> @@ -141,6 +141,11 @@ extern "C" {
>   * releasing the memory
>   */
>  #define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE   (1 << 9)
> +/* Flag that BO will be encrypted and that the TMZ bit should be
> + * set in the PTEs when mapping this buffer via GPUVM or
> + * accessing it with various hw blocks
> + */
> +#define AMDGPU_GEM_CREATE_ENCRYPTED  (1 << 10)

Style!
TAB char?!

You have a TAB char between ".._ENCRYPTED" and "(1 << 10)"
Do NOT add/insert TAB chars instead of space to align colunmns!
If when you press Tab key a tab is inserted, as opposed to the line
indented, then DO NOT use this editor.
The Tab key should "indent according to mode" by inserting TAB chars.
If the line is already indented, as this one is, then it should do nothing.

Don't bunch up definitions together, nor their comments.
You need an empty line after #define and before /* Flag that ... */

In comments please use present tense, not future tense!

The comment can be improved like:

/* Indicates that the BO is encrypted. This will set the TMZ
 * bit in PTEs when mapping this buffer.
 */
#define AMDGPU_GEM_CREATE_ENCRYPTED

Regards,
Luben

>  
>  /* Hybrid specific */
>  /* Flag that the memory allocation should be from top of domain */
> 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: Potential BUG: [PATCH 2/8] drm/amdgpu: add a generic fb accessing helper function(v3)

2019-11-19 Thread Luben Tuikov
Hi Iago,

Thank you for finding and reporting this potential double lock.

Yes indeed, I see it--it can indeed happen.

Now, since the primitives used--macros using "amdgpu_mm_(r|w)reg\(.*\)"--in
"amdgpu_device_vram_access()" do use their own register-access spinlocks,
it maybe wise to remove the spinlock take/release in 
"amdgpu_device_vram_access()".

We'll look into it and possibly submit another patch.

Thanks again.

Regards,
Luben

On 2019-11-16 11:21 a.m., Iago Abal wrote:
> Hi,
> 
> With the help of a static bug finder (EBA - https://github.com/IagoAbal/eba) 
> I have found a potential double lock in Linux Next tag next-20191115, file 
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c.
> 
> This bug seems to be introduced by commit
> e35e2b117f4 ("drm/amdgpu: add a generic fb accessing helper function(v3)").
> 
> The steps to reproduce it would be:
> 
> 1. Start in function `amdgpu_device_vram_access`.
> 2. Enter for-loop `for (last += pos; pos <= last; pos += 4)`.
> 3. First lock: `spin_lock_irqsave(&adev->mmio_idx_lock, flags)`.
> 4. Call to `WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x8000)`.
>    5. Note `#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 
> AMDGPU_REGS_NO_KIQ)`.
>    6. Continue in function `amdgpu_mm_wreg`.
>    7. Take else-branch in the third if-statement.
>    8. Double lock: `spin_lock_irqsave(&adev->mmio_idx_lock, flags)`.
> 
> I think the control flow could reach that second lock, but you may know 
> better.
> 
> Hope it helps!
> 
> -- iago

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 5/5] drm/amdgpu: enable TMZ bit in FRAME_CONTROL for gfx10

2019-11-19 Thread Luben Tuikov
On 2019-11-18 12:18 a.m., Aaron Liu wrote:
> This patch enables TMZ bit in FRAME_CONTROL for gfx10.
> 
> Signed-off-by: Aaron Liu 
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index d6e11ee..8dce067 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4588,7 +4588,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct 
> amdgpu_ring *ring,
>   gfx_v10_0_ring_emit_ce_meta(ring,
>   flags & AMDGPU_IB_PREEMPTED ? true : false);
>  
> - gfx_v10_0_ring_emit_tmz(ring, true, false);
> + gfx_v10_0_ring_emit_tmz(ring, true, trusted);
>  

Do you mean here "true" (the opposite of "false") as opposed to "trusted"?

Regards,
Luben

>   dw2 |= 0x8000; /* set load_enable otherwise this package is just 
> NOPs */
>   if (flags & AMDGPU_HAVE_CTX_SWITCH) {
> 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/5] drm/amdgpu: expand sdma copy_buffer interface with tmz parameter

2019-11-19 Thread Luben Tuikov
I wonder if we really do need yet another function argument,
thus increasing the argument list, or if the "tmz" boolean
can/is already a property of the job/command/ib/etc., and
if it can indeed be had from the latter entity...?

Regards,
Luben

On 2019-11-18 12:18 a.m., Aaron Liu wrote:
> This patch expands sdma copy_buffer interface with tmz parameter.
> 
> Signed-off-by: Aaron Liu 
> Reviewed-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 5 +++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 4 ++--
>  drivers/gpu/drm/amd/amdgpu/cik_sdma.c| 3 ++-
>  drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   | 3 ++-
>  drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   | 3 ++-
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 3 ++-
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 3 ++-
>  drivers/gpu/drm/amd/amdgpu/si_dma.c  | 3 ++-
>  8 files changed, 17 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> index 761ff8b..b313465 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> @@ -79,7 +79,8 @@ struct amdgpu_buffer_funcs {
>/* dst addr in bytes */
>uint64_t dst_offset,
>/* number of byte to transfer */
> -  uint32_t byte_count);
> +  uint32_t byte_count,
> +  bool tmz);
>  
>   /* maximum bytes in a single operation */
>   uint32_tfill_max_bytes;
> @@ -97,7 +98,7 @@ struct amdgpu_buffer_funcs {
>uint32_t byte_count);
>  };
>  
> -#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) 
> (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b))
> +#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) 
> (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b), (t))
>  #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) 
> (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
>  
>  struct amdgpu_sdma_instance *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 339088d..c08c15e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2022,7 +2022,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object 
> *bo,
>   dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
>   dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
>   amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
> - dst_addr, num_bytes);
> + dst_addr, num_bytes, false);
>  
>   amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>   WARN_ON(job->ibs[0].length_dw > num_dw);
> @@ -2093,7 +2093,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, 
> uint64_t src_offset,
>   uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
>  
>   amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
> - dst_offset, cur_size_in_bytes);
> + dst_offset, cur_size_in_bytes, false);
>  
>   src_offset += cur_size_in_bytes;
>   dst_offset += cur_size_in_bytes;
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index c45304f..82cdb8f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -1313,7 +1313,8 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device 
> *adev)
>  static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
> uint64_t src_offset,
> uint64_t dst_offset,
> -   uint32_t byte_count)
> +   uint32_t byte_count,
> +   bool tmz)
>  {
>   ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, 
> SDMA_COPY_SUB_OPCODE_LINEAR, 0);
>   ib->ptr[ib->length_dw++] = byte_count;
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index a101758..89e8c74 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -1200,7 +1200,8 @@ static void sdma_v2_4_set_irq_funcs(struct 
> amdgpu_device *adev)
>  static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
>  uint64_t src_offset,
>  uint64_t dst_offset,
> -uint32_t byte_count)
> +uint32_t byte_count,
> +bool tmz)
>  {
>   ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
>   SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
> diff --git a/drivers/gpu/drm/amd/amd

Re: [PATCH] drm/dsc: Return unsigned long on compute offset

2019-11-19 Thread Mikita Lipski



On 19/11/2019 16:09, Mikita Lipski wrote:



On 19/11/2019 12:11, Ville Syrjälä wrote:

On Tue, Nov 19, 2019 at 04:59:40PM +, Cornij, Nikola wrote:

If you're going to make all of them the same, then u64, please.

This is because I'm not sure if calculations require 64-bit at some 
stage.


If it does then it's already broken. Someone should probably figure out
what's actally needed instead of shooting ducks with an icbm.




Sorry made a type below. Supposed to be "I don't think it is broken"


I don't think it is not broken, cause I'm currently testing DSC.
The patch I sent early simply fixes the error of comparing  signed and 
unsigned variables.


We can then submit a second patch addressing the issue of using unsigned 
long int instead of u32. Also, since the variables in drm_dsc_config 
structure are all of type u8 and u16, the calculation values shouldn't 
exceed the size of u32.


Thanks



-Original Message-
From: Lipski, Mikita 
Sent: November 19, 2019 10:08 AM
To: Ville Syrjälä ; Lipski, Mikita 

Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; 
Cornij, Nikola 

Subject: Re: [PATCH] drm/dsc: Return unsigned long on compute offset



On 19/11/2019 09:56, Ville Syrjälä wrote:

On Tue, Nov 19, 2019 at 09:45:26AM -0500, mikita.lip...@amd.com wrote:

From: Mikita Lipski 

We shouldn't compare int with unsigned long to find the max value and
since we are not expecting negative value returned from
compute_offset we should make this function return unsigned long so
we can compare the values when computing rc parameters.


Why are there other unsigned longs in dsc parameter computation in the
first place?


I believe it was initially set to be unsigned long for variable 
consistency, when we ported intel_compute_rc_parameters into 
drm_dsc_compute_rc_parameters. But now that I look at it, we can 
actually just set them to u32 or u64, as nothing should exceed that.




Cc: Nikola Cornij 
Cc: Harry Wentland 
Signed-off-by: Mikita Lipski 
---
   drivers/gpu/drm/drm_dsc.c | 6 +++---
   1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c
index 74f3527f567d..ec40604ab6a2 100644
--- a/drivers/gpu/drm/drm_dsc.c
+++ b/drivers/gpu/drm/drm_dsc.c
@@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct 
drm_dsc_picture_parameter_set *pps_payload,

   }
   EXPORT_SYMBOL(drm_dsc_pps_payload_pack);
-static int compute_offset(struct drm_dsc_config *vdsc_cfg, int 
pixels_per_group,
+static unsigned long compute_offset(struct drm_dsc_config 
*vdsc_cfg, int pixels_per_group,

   int groups_per_line, int grpcnt)
   {
-    int offset = 0;
-    int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
pixels_per_group);

+    unsigned long offset = 0;
+    unsigned long grpcnt_id = 
DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, pixels_per_group);

   if (grpcnt <= grpcnt_id)
   offset = DIV_ROUND_UP(grpcnt * pixels_per_group * 
vdsc_cfg->bits_per_pixel, 16);

--
2.17.1

___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel




--
Thanks,
Mikita Lipski
Software Engineer 2, AMD
mikita.lip...@amd.com






--
Thanks,
Mikita Lipski
Software Engineer 2, AMD
mikita.lip...@amd.com
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/dsc: Return unsigned long on compute offset

2019-11-19 Thread Mikita Lipski



On 19/11/2019 12:11, Ville Syrjälä wrote:

On Tue, Nov 19, 2019 at 04:59:40PM +, Cornij, Nikola wrote:

If you're going to make all of them the same, then u64, please.

This is because I'm not sure if calculations require 64-bit at some stage.


If it does then it's already broken. Someone should probably figure out
what's actally needed instead of shooting ducks with an icbm.


I don't think it is not broken, cause I'm currently testing DSC.
The patch I sent early simply fixes the error of comparing  signed and 
unsigned variables.


We can then submit a second patch addressing the issue of using unsigned 
long int instead of u32. Also, since the variables in drm_dsc_config 
structure are all of type u8 and u16, the calculation values shouldn't 
exceed the size of u32.


Thanks



-Original Message-
From: Lipski, Mikita 
Sent: November 19, 2019 10:08 AM
To: Ville Syrjälä ; Lipski, Mikita 

Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; Cornij, Nikola 

Subject: Re: [PATCH] drm/dsc: Return unsigned long on compute offset



On 19/11/2019 09:56, Ville Syrjälä wrote:

On Tue, Nov 19, 2019 at 09:45:26AM -0500, mikita.lip...@amd.com wrote:

From: Mikita Lipski 

We shouldn't compare int with unsigned long to find the max value and
since we are not expecting negative value returned from
compute_offset we should make this function return unsigned long so
we can compare the values when computing rc parameters.


Why are there other unsigned longs in dsc parameter computation in the
first place?


I believe it was initially set to be unsigned long for variable consistency, 
when we ported intel_compute_rc_parameters into drm_dsc_compute_rc_parameters. 
But now that I look at it, we can actually just set them to u32 or u64, as 
nothing should exceed that.




Cc: Nikola Cornij 
Cc: Harry Wentland 
Signed-off-by: Mikita Lipski 
---
   drivers/gpu/drm/drm_dsc.c | 6 +++---
   1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c
index 74f3527f567d..ec40604ab6a2 100644
--- a/drivers/gpu/drm/drm_dsc.c
+++ b/drivers/gpu/drm/drm_dsc.c
@@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct 
drm_dsc_picture_parameter_set *pps_payload,
   }
   EXPORT_SYMBOL(drm_dsc_pps_payload_pack);
   
-static int compute_offset(struct drm_dsc_config *vdsc_cfg, int pixels_per_group,

+static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int 
pixels_per_group,
int groups_per_line, int grpcnt)
   {
-   int offset = 0;
-   int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
pixels_per_group);
+   unsigned long offset = 0;
+   unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
pixels_per_group);
   
   	if (grpcnt <= grpcnt_id)

offset = DIV_ROUND_UP(grpcnt * pixels_per_group * 
vdsc_cfg->bits_per_pixel, 16);
--
2.17.1

___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel




--
Thanks,
Mikita Lipski
Software Engineer 2, AMD
mikita.lip...@amd.com




--
Thanks,
Mikita Lipski
Software Engineer 2, AMD
mikita.lip...@amd.com
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amd/display: add default clocks if not able to fetch them

2019-11-19 Thread Alex Deucher
dm_pp_get_clock_levels_by_type needs to add the default clocks
to the powerplay case as well.  This was accidently dropped.

Fixes: b3ea88fef321de ("drm/amd/powerplay: add get_clock_by_type interface for 
display")
Bug: https://gitlab.freedesktop.org/drm/amd/issues/906
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
index 229788bee544..a2e1a73f66b8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
@@ -342,7 +342,8 @@ bool dm_pp_get_clock_levels_by_type(
if (adev->powerplay.pp_funcs && 
adev->powerplay.pp_funcs->get_clock_by_type) {
if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle,
dc_to_pp_clock_type(clk_type), &pp_clks)) {
-   /* Error in pplib. Provide default values. */
+   /* Error in pplib. Provide default values. */
+   get_default_clock_levels(clk_type, dc_clks);
return true;
}
} else if (adev->smu.ppt_funcs && 
adev->smu.ppt_funcs->get_clock_by_type) {
-- 
2.23.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] amd/amdgpu: force to trigger a no-retry-fault after a retry-fault

2019-11-19 Thread Christian König

Am 19.11.19 um 17:45 schrieb Felix Kuehling:

On 2019-11-19 11:37, Alex Sierra wrote:

Only for the debugger use case.

[why]
Avoid endless translation retries, after an invalid address access has
been issued to the GPU. Instead, the trap handler is forced to enter by
generating a no-retry-fault.
A s_trap instruction is inserted in the debugger case to let the wave to
enter trap handler to save context.

[how]
Intentionally using an invalid flag combination (F and P set at the same
time) to trigger a no-retry-fault, after a retry-fault happens. This is
only valid under compute context.

Change-Id: I4180c30e2631dc0401cbd6171f8a6776e4733c9a
Signed-off-by: Alex Sierra 


This commit adds some unnecessary empty lines. See inline. With that 
fixed, the series is


Reviewed-by: Felix Kuehling 


I actually like the empty lines, nicely emphases that a new block starts.

Maybe note in the code comment that the flags combination is 
intentionally invalid as well.


But either way the series is Reviewed-by: Christian König 
.


Regards,
Christian.



Please also give Christian a chance to review.

Thanks,
  Felix


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index d51ac8771ae0..cd36195ff8be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3202,11 +3202,18 @@ bool amdgpu_vm_handle_fault(struct 
amdgpu_device *adev, unsigned int pasid,

  flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
  AMDGPU_PTE_SYSTEM;
  -    if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
+    if (vm->is_compute_context) {
+    /* Setting PTE flags to trigger a no-retry-fault  */
+    flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
+    AMDGPU_PTE_TF;
+    value = 0;
+

Unnecessary blank line.

+    } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
  /* Redirect the access to the dummy page */
  value = adev->dummy_page_addr;
  flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
  AMDGPU_PTE_WRITEABLE;
+

Unnecessary blank line.

  } else {
  /* Let the hw retry silently on the PTE */
  value = 0;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 12/14] drm/amdgpu: Use mmu_interval_notifier instead of hmm_mirror

2019-11-19 Thread Philip Yang

I test v3 and it works fine.

Regards,
Philip

On 2019-11-12 3:22 p.m., Jason Gunthorpe wrote:

From: Jason Gunthorpe 

Convert the collision-retry lock around hmm_range_fault to use the one now
provided by the mmu_interval notifier.

Although this driver does not seem to use the collision retry lock that
hmm provides correctly, it can still be converted over to use the
mmu_interval_notifier api instead of hmm_mirror without too much trouble.

This also deletes another place where a driver is associating additional
data (struct amdgpu_mn) with a mmu_struct.

Signed-off-by: Philip Yang 
Reviewed-by: Philip Yang 
Tested-by: Philip Yang 
Signed-off-by: Jason Gunthorpe 
---
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   4 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|  14 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 148 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|  49 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 116 --
  5 files changed, 94 insertions(+), 237 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 47700302a08b7f..1bcedb9b477dce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1738,6 +1738,10 @@ static int update_invalid_user_pages(struct 
amdkfd_process_info *process_info,
return ret;
}
  
+		/*

+* FIXME: Cannot ignore the return code, must hold
+* notifier_lock
+*/
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
  
  		/* Mark the BO as valid unless it was invalidated

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 82823d9a8ba887..22c989bca7514c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -603,8 +603,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->tv.num_shared = 2;
  
  	amdgpu_bo_list_get_list(p->bo_list, &p->validated);

-   if (p->bo_list->first_userptr != p->bo_list->num_entries)
-   p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
  
  	INIT_LIST_HEAD(&duplicates);

amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
@@ -1287,11 +1285,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
  
-	/* No memory allocation is allowed while holding the mn lock.

-* p->mn is hold until amdgpu_cs_submit is finished and fence is added
-* to BOs.
+   /* No memory allocation is allowed while holding the notifier lock.
+* The lock is held until amdgpu_cs_submit is finished and fence is
+* added to BOs.
 */
-   amdgpu_mn_lock(p->mn);
+   mutex_lock(&p->adev->notifier_lock);
  
  	/* If userptr are invalidated after amdgpu_cs_parser_bos(), return

 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
@@ -1334,13 +1332,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
  
  	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);

-   amdgpu_mn_unlock(p->mn);
+   mutex_unlock(&p->adev->notifier_lock);
  
  	return 0;
  
  error_abort:

drm_sched_job_cleanup(&job->base);
-   amdgpu_mn_unlock(p->mn);
+   mutex_unlock(&p->adev->notifier_lock);
  
  error_unlock:

amdgpu_job_free(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 9fe1c31ce17a30..828b5167ff128f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -50,28 +50,6 @@
  #include "amdgpu.h"
  #include "amdgpu_amdkfd.h"
  
-/**

- * amdgpu_mn_lock - take the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_lock(struct amdgpu_mn *mn)
-{
-   if (mn)
-   down_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_unlock - drop the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_unlock(struct amdgpu_mn *mn)
-{
-   if (mn)
-   up_write(&mn->lock);
-}
-
  /**
   * amdgpu_mn_invalidate_gfx - callback to notify about mm change
   *
@@ -94,6 +72,9 @@ static bool amdgpu_mn_invalidate_gfx(struct 
mmu_interval_notifier *mni,
return false;
  
  	mutex_lock(&adev->notifier_lock);

+
+   mmu_interval_set_seq(mni, cur_seq);
+
r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false,
  MAX_SCHEDULE_TIMEOUT);
mutex_unlock(&adev->notifier_lock);
@@ -127,6 +108,9 @@ static bool amdgpu_mn_invalidate_hsa(struct 
mmu_interval_notifier *mni,
return false;
  
  	mutex_lock(&adev->notifier_lock);

+
+   mmu_interval_set_seq(mni, cur_seq);
+
amdgpu_amdkfd_evict_user

Re: [PATCH] drm/dsc: Return unsigned long on compute offset

2019-11-19 Thread Ville Syrjälä
On Tue, Nov 19, 2019 at 04:59:40PM +, Cornij, Nikola wrote:
> If you're going to make all of them the same, then u64, please.
> 
> This is because I'm not sure if calculations require 64-bit at some stage.

If it does then it's already broken. Someone should probably figure out
what's actally needed instead of shooting ducks with an icbm.

> 
> -Original Message-
> From: Lipski, Mikita  
> Sent: November 19, 2019 10:08 AM
> To: Ville Syrjälä ; Lipski, Mikita 
> 
> Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; Cornij, 
> Nikola 
> Subject: Re: [PATCH] drm/dsc: Return unsigned long on compute offset
> 
> 
> 
> On 19/11/2019 09:56, Ville Syrjälä wrote:
> > On Tue, Nov 19, 2019 at 09:45:26AM -0500, mikita.lip...@amd.com wrote:
> >> From: Mikita Lipski 
> >>
> >> We shouldn't compare int with unsigned long to find the max value and 
> >> since we are not expecting negative value returned from 
> >> compute_offset we should make this function return unsigned long so 
> >> we can compare the values when computing rc parameters.
> > 
> > Why are there other unsigned longs in dsc parameter computation in the 
> > first place?
> 
> I believe it was initially set to be unsigned long for variable consistency, 
> when we ported intel_compute_rc_parameters into 
> drm_dsc_compute_rc_parameters. But now that I look at it, we can actually 
> just set them to u32 or u64, as nothing should exceed that.
> > 
> >>
> >> Cc: Nikola Cornij 
> >> Cc: Harry Wentland 
> >> Signed-off-by: Mikita Lipski 
> >> ---
> >>   drivers/gpu/drm/drm_dsc.c | 6 +++---
> >>   1 file changed, 3 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c
> >> index 74f3527f567d..ec40604ab6a2 100644
> >> --- a/drivers/gpu/drm/drm_dsc.c
> >> +++ b/drivers/gpu/drm/drm_dsc.c
> >> @@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct 
> >> drm_dsc_picture_parameter_set *pps_payload,
> >>   }
> >>   EXPORT_SYMBOL(drm_dsc_pps_payload_pack);
> >>   
> >> -static int compute_offset(struct drm_dsc_config *vdsc_cfg, int 
> >> pixels_per_group,
> >> +static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int 
> >> pixels_per_group,
> >>int groups_per_line, int grpcnt)
> >>   {
> >> -  int offset = 0;
> >> -  int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
> >> pixels_per_group);
> >> +  unsigned long offset = 0;
> >> +  unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
> >> pixels_per_group);
> >>   
> >>if (grpcnt <= grpcnt_id)
> >>offset = DIV_ROUND_UP(grpcnt * pixels_per_group * 
> >> vdsc_cfg->bits_per_pixel, 16);
> >> -- 
> >> 2.17.1
> >>
> >> ___
> >> dri-devel mailing list
> >> dri-de...@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/dri-devel
> > 
> 
> -- 
> Thanks,
> Mikita Lipski
> Software Engineer 2, AMD
> mikita.lip...@amd.com

-- 
Ville Syrjälä
Intel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/dsc: Return unsigned long on compute offset

2019-11-19 Thread Cornij, Nikola
If you're going to make all of them the same, then u64, please.

This is because I'm not sure if calculations require 64-bit at some stage.

-Original Message-
From: Lipski, Mikita  
Sent: November 19, 2019 10:08 AM
To: Ville Syrjälä ; Lipski, Mikita 

Cc: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; Cornij, 
Nikola 
Subject: Re: [PATCH] drm/dsc: Return unsigned long on compute offset



On 19/11/2019 09:56, Ville Syrjälä wrote:
> On Tue, Nov 19, 2019 at 09:45:26AM -0500, mikita.lip...@amd.com wrote:
>> From: Mikita Lipski 
>>
>> We shouldn't compare int with unsigned long to find the max value and 
>> since we are not expecting negative value returned from 
>> compute_offset we should make this function return unsigned long so 
>> we can compare the values when computing rc parameters.
> 
> Why are there other unsigned longs in dsc parameter computation in the 
> first place?

I believe it was initially set to be unsigned long for variable consistency, 
when we ported intel_compute_rc_parameters into drm_dsc_compute_rc_parameters. 
But now that I look at it, we can actually just set them to u32 or u64, as 
nothing should exceed that.
> 
>>
>> Cc: Nikola Cornij 
>> Cc: Harry Wentland 
>> Signed-off-by: Mikita Lipski 
>> ---
>>   drivers/gpu/drm/drm_dsc.c | 6 +++---
>>   1 file changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c
>> index 74f3527f567d..ec40604ab6a2 100644
>> --- a/drivers/gpu/drm/drm_dsc.c
>> +++ b/drivers/gpu/drm/drm_dsc.c
>> @@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct 
>> drm_dsc_picture_parameter_set *pps_payload,
>>   }
>>   EXPORT_SYMBOL(drm_dsc_pps_payload_pack);
>>   
>> -static int compute_offset(struct drm_dsc_config *vdsc_cfg, int 
>> pixels_per_group,
>> +static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int 
>> pixels_per_group,
>>  int groups_per_line, int grpcnt)
>>   {
>> -int offset = 0;
>> -int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
>> pixels_per_group);
>> +unsigned long offset = 0;
>> +unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
>> pixels_per_group);
>>   
>>  if (grpcnt <= grpcnt_id)
>>  offset = DIV_ROUND_UP(grpcnt * pixels_per_group * 
>> vdsc_cfg->bits_per_pixel, 16);
>> -- 
>> 2.17.1
>>
>> ___
>> dri-devel mailing list
>> dri-de...@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 

-- 
Thanks,
Mikita Lipski
Software Engineer 2, AMD
mikita.lip...@amd.com
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] amd/amdgpu: force to trigger a no-retry-fault after a retry-fault

2019-11-19 Thread Felix Kuehling

On 2019-11-19 11:37, Alex Sierra wrote:

Only for the debugger use case.

[why]
Avoid endless translation retries, after an invalid address access has
been issued to the GPU. Instead, the trap handler is forced to enter by
generating a no-retry-fault.
A s_trap instruction is inserted in the debugger case to let the wave to
enter trap handler to save context.

[how]
Intentionally using an invalid flag combination (F and P set at the same
time) to trigger a no-retry-fault, after a retry-fault happens. This is
only valid under compute context.

Change-Id: I4180c30e2631dc0401cbd6171f8a6776e4733c9a
Signed-off-by: Alex Sierra 


This commit adds some unnecessary empty lines. See inline. With that 
fixed, the series is


Reviewed-by: Felix Kuehling 

Please also give Christian a chance to review.

Thanks,
  Felix


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d51ac8771ae0..cd36195ff8be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3202,11 +3202,18 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
unsigned int pasid,
flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
AMDGPU_PTE_SYSTEM;
  
-	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {

+   if (vm->is_compute_context) {
+   /* Setting PTE flags to trigger a no-retry-fault  */
+   flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
+   AMDGPU_PTE_TF;
+   value = 0;
+

Unnecessary blank line.

+   } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
value = adev->dummy_page_addr;
flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
AMDGPU_PTE_WRITEABLE;
+

Unnecessary blank line.

} else {
/* Let the hw retry silently on the PTE */
value = 0;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: add flag to indicate amdgpu vm context

2019-11-19 Thread Alex Sierra
Flag added to indicate if the amdgpu vm context is used for compute or
graphics.

Change-Id: Ia813037fda2ec2947d73f5c7328388078fbeebe5
Signed-off-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3c0bd6472a46..d51ac8771ae0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2713,6 +2713,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
goto error_free_direct;
 
vm->pte_support_ats = false;
+   vm->is_compute_context = false;
 
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
@@ -2898,6 +2899,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
vm->update_funcs = &amdgpu_vm_sdma_funcs;
dma_fence_put(vm->last_update);
vm->last_update = NULL;
+   vm->is_compute_context = true;
 
if (vm->pasid) {
unsigned long flags;
@@ -2952,6 +2954,7 @@ void amdgpu_vm_release_compute(struct amdgpu_device 
*adev, struct amdgpu_vm *vm)
spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
}
vm->pasid = 0;
+   vm->is_compute_context = false;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 5cb25c1c54e0..76fcf853035c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -301,6 +301,8 @@ struct amdgpu_vm {
struct ttm_lru_bulk_move lru_bulk_move;
/* mark whether can do the bulk move */
boolbulk_moveable;
+   /* Flag to indicate if VM is used for compute */
+   boolis_compute_context;
 };
 
 struct amdgpu_vm_manager {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] amd/amdgpu: force to trigger a no-retry-fault after a retry-fault

2019-11-19 Thread Alex Sierra
Only for the debugger use case.

[why]
Avoid endless translation retries, after an invalid address access has
been issued to the GPU. Instead, the trap handler is forced to enter by
generating a no-retry-fault.
A s_trap instruction is inserted in the debugger case to let the wave to
enter trap handler to save context.

[how]
Intentionally using an invalid flag combination (F and P set at the same
time) to trigger a no-retry-fault, after a retry-fault happens. This is
only valid under compute context.

Change-Id: I4180c30e2631dc0401cbd6171f8a6776e4733c9a
Signed-off-by: Alex Sierra 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d51ac8771ae0..cd36195ff8be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3202,11 +3202,18 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
unsigned int pasid,
flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
AMDGPU_PTE_SYSTEM;
 
-   if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
+   if (vm->is_compute_context) {
+   /* Setting PTE flags to trigger a no-retry-fault  */
+   flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
+   AMDGPU_PTE_TF;
+   value = 0;
+
+   } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
value = adev->dummy_page_addr;
flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
AMDGPU_PTE_WRITEABLE;
+
} else {
/* Let the hw retry silently on the PTE */
value = 0;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/dsc: Return unsigned long on compute offset

2019-11-19 Thread Mikita Lipski



On 19/11/2019 09:56, Ville Syrjälä wrote:

On Tue, Nov 19, 2019 at 09:45:26AM -0500, mikita.lip...@amd.com wrote:

From: Mikita Lipski 

We shouldn't compare int with unsigned long to find the max value
and since we are not expecting negative value returned from
compute_offset we should make this function return unsigned long
so we can compare the values when computing rc parameters.


Why are there other unsigned longs in dsc parameter computation
in the first place?


I believe it was initially set to be unsigned long for variable 
consistency, when we ported intel_compute_rc_parameters into 
drm_dsc_compute_rc_parameters. But now that I look at it, we can 
actually just set them to u32 or u64, as nothing should exceed that.




Cc: Nikola Cornij 
Cc: Harry Wentland 
Signed-off-by: Mikita Lipski 
---
  drivers/gpu/drm/drm_dsc.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c
index 74f3527f567d..ec40604ab6a2 100644
--- a/drivers/gpu/drm/drm_dsc.c
+++ b/drivers/gpu/drm/drm_dsc.c
@@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct 
drm_dsc_picture_parameter_set *pps_payload,
  }
  EXPORT_SYMBOL(drm_dsc_pps_payload_pack);
  
-static int compute_offset(struct drm_dsc_config *vdsc_cfg, int pixels_per_group,

+static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int 
pixels_per_group,
int groups_per_line, int grpcnt)
  {
-   int offset = 0;
-   int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
pixels_per_group);
+   unsigned long offset = 0;
+   unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
pixels_per_group);
  
  	if (grpcnt <= grpcnt_id)

offset = DIV_ROUND_UP(grpcnt * pixels_per_group * 
vdsc_cfg->bits_per_pixel, 16);
--
2.17.1

___
dri-devel mailing list
dri-de...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel




--
Thanks,
Mikita Lipski
Software Engineer 2, AMD
mikita.lip...@amd.com
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/dsc: Return unsigned long on compute offset

2019-11-19 Thread Ville Syrjälä
On Tue, Nov 19, 2019 at 09:45:26AM -0500, mikita.lip...@amd.com wrote:
> From: Mikita Lipski 
> 
> We shouldn't compare int with unsigned long to find the max value
> and since we are not expecting negative value returned from
> compute_offset we should make this function return unsigned long
> so we can compare the values when computing rc parameters.

Why are there other unsigned longs in dsc parameter computation
in the first place?

> 
> Cc: Nikola Cornij 
> Cc: Harry Wentland 
> Signed-off-by: Mikita Lipski 
> ---
>  drivers/gpu/drm/drm_dsc.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c
> index 74f3527f567d..ec40604ab6a2 100644
> --- a/drivers/gpu/drm/drm_dsc.c
> +++ b/drivers/gpu/drm/drm_dsc.c
> @@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct 
> drm_dsc_picture_parameter_set *pps_payload,
>  }
>  EXPORT_SYMBOL(drm_dsc_pps_payload_pack);
>  
> -static int compute_offset(struct drm_dsc_config *vdsc_cfg, int 
> pixels_per_group,
> +static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int 
> pixels_per_group,
>   int groups_per_line, int grpcnt)
>  {
> - int offset = 0;
> - int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
> pixels_per_group);
> + unsigned long offset = 0;
> + unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
> pixels_per_group);
>  
>   if (grpcnt <= grpcnt_id)
>   offset = DIV_ROUND_UP(grpcnt * pixels_per_group * 
> vdsc_cfg->bits_per_pixel, 16);
> -- 
> 2.17.1
> 
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Ville Syrjälä
Intel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/dsc: Return unsigned long on compute offset

2019-11-19 Thread mikita.lipski
From: Mikita Lipski 

We shouldn't compare int with unsigned long to find the max value
and since we are not expecting negative value returned from
compute_offset we should make this function return unsigned long
so we can compare the values when computing rc parameters.

Cc: Nikola Cornij 
Cc: Harry Wentland 
Signed-off-by: Mikita Lipski 
---
 drivers/gpu/drm/drm_dsc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c
index 74f3527f567d..ec40604ab6a2 100644
--- a/drivers/gpu/drm/drm_dsc.c
+++ b/drivers/gpu/drm/drm_dsc.c
@@ -245,11 +245,11 @@ void drm_dsc_pps_payload_pack(struct 
drm_dsc_picture_parameter_set *pps_payload,
 }
 EXPORT_SYMBOL(drm_dsc_pps_payload_pack);
 
-static int compute_offset(struct drm_dsc_config *vdsc_cfg, int 
pixels_per_group,
+static unsigned long compute_offset(struct drm_dsc_config *vdsc_cfg, int 
pixels_per_group,
int groups_per_line, int grpcnt)
 {
-   int offset = 0;
-   int grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
pixels_per_group);
+   unsigned long offset = 0;
+   unsigned long grpcnt_id = DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay, 
pixels_per_group);
 
if (grpcnt <= grpcnt_id)
offset = DIV_ROUND_UP(grpcnt * pixels_per_group * 
vdsc_cfg->bits_per_pixel, 16);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 3/3] drm/amdgpu: invalidate semphore workaround in gmc9/gmc10

2019-11-19 Thread Christian König

Am 19.11.19 um 10:13 schrieb Changfeng.Zhu:

From: changzhu 

It may lose gpuvm invalidate acknowldege state across power-gating off
cycle. To avoid this issue in gmc9/gmc10 invalidation, add semaphore acquire
before invalidation and semaphore release after invalidation.

After adding semaphore acquire before invalidation, the semaphore
register become read-only if another process try to acquire semaphore.
Then it will not be able to release this semaphore. Then it may cause
deadlock problem. If this deadlock problem happens, it needs a semaphore
firmware fix.

Change-Id: I9942a2f451265c1f1038ccfe2f70042c7c8118af
Signed-off-by: changzhu 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 45 ++
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 45 ++
  drivers/gpu/drm/amd/amdgpu/soc15.h |  4 +--
  3 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index af2615ba52aa..c47a163b88b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -234,6 +234,24 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
const unsigned eng = 17;
unsigned int i;
  
+	spin_lock(&adev->gmc.invalidate_lock);

+   /*
+* It may lose gpuvm invalidate acknowldege state across power-gating
+* off cycle, add semaphore acquire before invalidation and semaphore
+* release after invalidation to avoid entering power gated state
+* to WA the Issue
+*/
+   for (i = 0; i < adev->usec_timeout; i++) {
+   /* a read return value of 1 means semaphore acuqire */
+   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
+   if (tmp & 0x1)
+   break;
+   udelay(1);
+   }
+
+   if (i >= adev->usec_timeout)
+   DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
  
  	/*

@@ -253,6 +271,14 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
udelay(1);
}
  
+	/*

+* add semaphore release after invalidation,
+* write with 0 means semaphore release
+*/
+   WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
+
+   spin_unlock(&adev->gmc.invalidate_lock);
+
if (i < adev->usec_timeout)
return;
  
@@ -338,6 +364,19 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,

uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
  
+	/*

+* It may lose gpuvm invalidate acknowldege state across power-gating
+* off cycle, add semaphore acquire before invalidation and semaphore
+* release after invalidation to avoid entering power gated state
+* to WA the Issue
+*/
+
+   /* a read return value of 1 means semaphore acuqire */
+   amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
+
+   DRM_WARN_ONCE("Adding semaphore may cause deadlock and it needs firmware 
fix\n");
+
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
  lower_32_bits(pd_addr));
  
@@ -348,6 +387,12 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,

hub->vm_inv_eng0_ack + eng,
req, 1 << vmid);
  
+	/*

+* add semaphore release after invalidation,
+* write with 0 means semaphore release
+*/
+   amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
+
return pd_addr;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 1ae59af7836a..cfef219ced99 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -456,6 +456,24 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
}
  
  	spin_lock(&adev->gmc.invalidate_lock);

+
+   /*
+* It may lose gpuvm invalidate acknowldege state across power-gating
+* off cycle, add semaphore acquire before invalidation and semaphore
+* release after invalidation to avoid entering power gated state
+* to WA the Issue
+*/
+   for (j = 0; j < adev->usec_timeout; j++) {
+   /* a read return value of 1 means semaphore acuqire */
+   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
+   if (tmp & 0x1)
+   break;
+   udelay(1);
+   }
+
+   if (j >= adev->usec_timeout)
+   DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
  
  	/*

@@ -

Re: [PATCH 2/3] drm/amdgpu: invalidate semphore workaround in amdgpu_virt

2019-11-19 Thread Christian König

Am 19.11.19 um 10:12 schrieb Changfeng.Zhu:

From: changzhu 

It may lose gpuvm invalidate acknowldege state across power-gating off
cycle. To avoid this issue in virt invalidation, add semaphore acquire
before invalidation and semaphore release after invalidation.

Change-Id: Ie98304e475166b53eed033462d76423b6b0fc25b
Signed-off-by: changzhu 


Still needs more cleanup, but for now that patch is Reviewed-by: 
Christian König 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 22 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  3 ++-
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c|  3 ++-
  3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index f04eb1a64271..ee576158545e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -135,7 +135,8 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, 
uint32_t reg, uint32_t v)
  
  void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,

uint32_t reg0, uint32_t reg1,
-   uint32_t ref, uint32_t mask)
+   uint32_t ref, uint32_t mask,
+   uint32_t sem)
  {
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
@@ -144,9 +145,26 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct 
amdgpu_device *adev,
uint32_t seq;
  
  	spin_lock_irqsave(&kiq->ring_lock, flags);

-   amdgpu_ring_alloc(ring, 32);
+   amdgpu_ring_alloc(ring, 60);
+
+   /*
+* It may lose gpuvm invalidate acknowldege state across power-gating
+* off cycle, add semaphore acquire before invalidation and semaphore
+* release after invalidation to avoid entering power gated state
+* to WA the Issue
+*/
+
+   /* a read return value of 1 means semaphore acuqire */
+   amdgpu_ring_emit_reg_wait(ring, sem, 0x1, 0x1);
+
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
+   /*
+* add semaphore release after invalidation,
+* write with 0 means semaphore release
+*/
+   amdgpu_ring_emit_wreg(ring, sem, 0);
+
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b0b2bdc750df..bda6a2f37dc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -295,7 +295,8 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg);
  void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t 
v);
  void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
-   uint32_t ref, uint32_t mask);
+   uint32_t ref, uint32_t mask,
+   uint32_t sem);
  int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
  int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
  int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index f25cd97ba5f2..1ae59af7836a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -448,9 +448,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
!adev->in_gpu_reset) {
uint32_t req = hub->vm_inv_eng0_req + eng;
uint32_t ack = hub->vm_inv_eng0_ack + eng;
+   uint32_t sem = hub->vm_inv_eng0_sem + eng;
  
  		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,

-   1 << vmid);
+  1 << vmid, sem);
return;
}
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/3] drm/amdgpu: initialize vm_inv_eng0_sem for gfxhub and mmhub

2019-11-19 Thread Christian König

Am 19.11.19 um 10:11 schrieb Changfeng.Zhu:

From: changzhu 

SW must acquire/release one of the vm_invalidate_eng*_sem around the
invalidation req/ack. Through this way,it can avoid losing invalidate
acknowledge state across power-gating off cycle.
To use vm_invalidate_eng*_sem, it needs to initialize
vm_invalidate_eng*_sem firstly.

Change-Id: Ic7abf481b08df085c326a98eba4b00d78f33560c
Signed-off-by: changzhu 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  | 1 +
  drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 2 ++
  drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 2 ++
  drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 2 ++
  drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c  | 2 ++
  drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c  | 4 
  6 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 406736a1bd3d..b499a3de8bb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -77,6 +77,7 @@ struct amdgpu_gmc_fault {
  struct amdgpu_vmhub {
uint32_tctx0_ptb_addr_lo32;
uint32_tctx0_ptb_addr_hi32;
+   uint32_tvm_inv_eng0_sem;
uint32_tvm_inv_eng0_req;
uint32_tvm_inv_eng0_ack;
uint32_tvm_context0_cntl;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 9ec4297e61e5..e91bd7945777 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -367,6 +367,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
 mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+   hub->vm_inv_eng0_sem =
+   SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index b4f32d853ca1..b70c7b483c24 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -356,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
 mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+   hub->vm_inv_eng0_sem =
+   SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 6965e1e6fa9e..28105e4af507 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -420,6 +420,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
 mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+   hub->vm_inv_eng0_sem =
+   SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 945533634711..a7cb185d639a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -348,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
 mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+   hub->vm_inv_eng0_sem =
+   SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 2c5adfe803a2..66efe2f7bd76 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -504,6 +504,10 @@ void mmhub_v9_4_init(struct amdgpu_device *adev)
SOC15_REG_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) +
i * MMHUB_INSTANCE_REGISTER_OFFSET;
+   hub[i]->vm_inv_eng0_sem =
+   SOC15_REG_OFFSET(MMHUB, 0,
+mmVML2VC0_VM_INVALIDATE_ENG0_SEM) +
+i * MMHUB_INSTANCE_REGISTER_OFFSET;
hub[i]->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0,
 mmVML2VC0_VM_INVALIDATE_ENG0_REQ) +


___

[PATCH 3/3] drm/amdgpu: invalidate semphore workaround in gmc9/gmc10

2019-11-19 Thread Changfeng.Zhu
From: changzhu 

It may lose gpuvm invalidate acknowldege state across power-gating off
cycle. To avoid this issue in gmc9/gmc10 invalidation, add semaphore acquire
before invalidation and semaphore release after invalidation.

After adding semaphore acquire before invalidation, the semaphore
register become read-only if another process try to acquire semaphore.
Then it will not be able to release this semaphore. Then it may cause
deadlock problem. If this deadlock problem happens, it needs a semaphore
firmware fix.

Change-Id: I9942a2f451265c1f1038ccfe2f70042c7c8118af
Signed-off-by: changzhu 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 45 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 45 ++
 drivers/gpu/drm/amd/amdgpu/soc15.h |  4 +--
 3 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index af2615ba52aa..c47a163b88b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -234,6 +234,24 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
const unsigned eng = 17;
unsigned int i;
 
+   spin_lock(&adev->gmc.invalidate_lock);
+   /*
+* It may lose gpuvm invalidate acknowldege state across power-gating
+* off cycle, add semaphore acquire before invalidation and semaphore
+* release after invalidation to avoid entering power gated state
+* to WA the Issue
+*/
+   for (i = 0; i < adev->usec_timeout; i++) {
+   /* a read return value of 1 means semaphore acuqire */
+   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
+   if (tmp & 0x1)
+   break;
+   udelay(1);
+   }
+
+   if (i >= adev->usec_timeout)
+   DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
 
/*
@@ -253,6 +271,14 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
udelay(1);
}
 
+   /*
+* add semaphore release after invalidation,
+* write with 0 means semaphore release
+*/
+   WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
+
+   spin_unlock(&adev->gmc.invalidate_lock);
+
if (i < adev->usec_timeout)
return;
 
@@ -338,6 +364,19 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct 
amdgpu_ring *ring,
uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
 
+   /*
+* It may lose gpuvm invalidate acknowldege state across power-gating
+* off cycle, add semaphore acquire before invalidation and semaphore
+* release after invalidation to avoid entering power gated state
+* to WA the Issue
+*/
+
+   /* a read return value of 1 means semaphore acuqire */
+   amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
+
+   DRM_WARN_ONCE("Adding semaphore may cause deadlock and it needs 
firmware fix\n");
+
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
  lower_32_bits(pd_addr));
 
@@ -348,6 +387,12 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct 
amdgpu_ring *ring,
hub->vm_inv_eng0_ack + eng,
req, 1 << vmid);
 
+   /*
+* add semaphore release after invalidation,
+* write with 0 means semaphore release
+*/
+   amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
+
return pd_addr;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 1ae59af7836a..cfef219ced99 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -456,6 +456,24 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
}
 
spin_lock(&adev->gmc.invalidate_lock);
+
+   /*
+* It may lose gpuvm invalidate acknowldege state across power-gating
+* off cycle, add semaphore acquire before invalidation and semaphore
+* release after invalidation to avoid entering power gated state
+* to WA the Issue
+*/
+   for (j = 0; j < adev->usec_timeout; j++) {
+   /* a read return value of 1 means semaphore acuqire */
+   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
+   if (tmp & 0x1)
+   break;
+   udelay(1);
+   }
+
+   if (j >= adev->usec_timeout)
+   DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
 
/*
@@ -471,7 +489,15 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdg

[PATCH 2/3] drm/amdgpu: invalidate semphore workaround in amdgpu_virt

2019-11-19 Thread Changfeng.Zhu
From: changzhu 

It may lose gpuvm invalidate acknowldege state across power-gating off
cycle. To avoid this issue in virt invalidation, add semaphore acquire
before invalidation and semaphore release after invalidation.

Change-Id: Ie98304e475166b53eed033462d76423b6b0fc25b
Signed-off-by: changzhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 22 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c|  3 ++-
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index f04eb1a64271..ee576158545e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -135,7 +135,8 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, 
uint32_t reg, uint32_t v)
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
-   uint32_t ref, uint32_t mask)
+   uint32_t ref, uint32_t mask,
+   uint32_t sem)
 {
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
@@ -144,9 +145,26 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct 
amdgpu_device *adev,
uint32_t seq;
 
spin_lock_irqsave(&kiq->ring_lock, flags);
-   amdgpu_ring_alloc(ring, 32);
+   amdgpu_ring_alloc(ring, 60);
+
+   /*
+* It may lose gpuvm invalidate acknowldege state across power-gating
+* off cycle, add semaphore acquire before invalidation and semaphore
+* release after invalidation to avoid entering power gated state
+* to WA the Issue
+*/
+
+   /* a read return value of 1 means semaphore acuqire */
+   amdgpu_ring_emit_reg_wait(ring, sem, 0x1, 0x1);
+
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
+   /*
+* add semaphore release after invalidation,
+* write with 0 means semaphore release
+*/
+   amdgpu_ring_emit_wreg(ring, sem, 0);
+
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b0b2bdc750df..bda6a2f37dc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -295,7 +295,8 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, 
uint32_t reg);
 void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t 
v);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
-   uint32_t ref, uint32_t mask);
+   uint32_t ref, uint32_t mask,
+   uint32_t sem);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index f25cd97ba5f2..1ae59af7836a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -448,9 +448,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
!adev->in_gpu_reset) {
uint32_t req = hub->vm_inv_eng0_req + eng;
uint32_t ack = hub->vm_inv_eng0_ack + eng;
+   uint32_t sem = hub->vm_inv_eng0_sem + eng;
 
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
-   1 << vmid);
+  1 << vmid, sem);
return;
}
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdgpu: initialize vm_inv_eng0_sem for gfxhub and mmhub

2019-11-19 Thread Changfeng.Zhu
From: changzhu 

SW must acquire/release one of the vm_invalidate_eng*_sem around the
invalidation req/ack. Through this way,it can avoid losing invalidate
acknowledge state across power-gating off cycle.
To use vm_invalidate_eng*_sem, it needs to initialize
vm_invalidate_eng*_sem firstly.

Change-Id: Ic7abf481b08df085c326a98eba4b00d78f33560c
Signed-off-by: changzhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  | 1 +
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 2 ++
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c  | 2 ++
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c  | 4 
 6 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 406736a1bd3d..b499a3de8bb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -77,6 +77,7 @@ struct amdgpu_gmc_fault {
 struct amdgpu_vmhub {
uint32_tctx0_ptb_addr_lo32;
uint32_tctx0_ptb_addr_hi32;
+   uint32_tvm_inv_eng0_sem;
uint32_tvm_inv_eng0_req;
uint32_tvm_inv_eng0_ack;
uint32_tvm_context0_cntl;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 9ec4297e61e5..e91bd7945777 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -367,6 +367,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
 mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+   hub->vm_inv_eng0_sem =
+   SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index b4f32d853ca1..b70c7b483c24 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -356,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
 mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+   hub->vm_inv_eng0_sem =
+   SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 6965e1e6fa9e..28105e4af507 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -420,6 +420,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
 mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+   hub->vm_inv_eng0_sem =
+   SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 945533634711..a7cb185d639a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -348,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
 mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+   hub->vm_inv_eng0_sem =
+   SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 2c5adfe803a2..66efe2f7bd76 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -504,6 +504,10 @@ void mmhub_v9_4_init(struct amdgpu_device *adev)
SOC15_REG_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) +
i * MMHUB_INSTANCE_REGISTER_OFFSET;
+   hub[i]->vm_inv_eng0_sem =
+   SOC15_REG_OFFSET(MMHUB, 0,
+mmVML2VC0_VM_INVALIDATE_ENG0_SEM) +
+i * MMHUB_INSTANCE_REGISTER_OFFSET;
hub[i]->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0,
 mmVML2VC0_VM_INVALIDATE_ENG0_REQ) +
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedeskto

RE: [PATCH v2] drm/scheduler: Avoid accessing freed bad job.

2019-11-19 Thread Deng, Emily
Tested-by: Emily Deng 

>-Original Message-
>From: Andrey Grodzovsky 
>Sent: Tuesday, November 19, 2019 1:52 AM
>Cc: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Koenig,
>Christian ; Deng, Emily
>; Grodzovsky, Andrey
>
>Subject: [PATCH v2] drm/scheduler: Avoid accessing freed bad job.
>
>Problem:
>Due to a race between drm_sched_cleanup_jobs in sched thread and
>drm_sched_job_timedout in timeout work there is a possiblity that bad job
>was already freed while still being accessed from the timeout thread.
>
>Fix:
>Instead of just peeking at the bad job in the mirror list remove it from the 
>list
>under lock and then put it back later when we are garanteed no race with
>main sched thread is possible which is after the thread is parked.
>
>v2: Lock around processing ring_mirror_list in drm_sched_cleanup_jobs.
>
>Signed-off-by: Andrey Grodzovsky 
>---
> drivers/gpu/drm/scheduler/sched_main.c | 44
>+-
> 1 file changed, 38 insertions(+), 6 deletions(-)
>
>diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>b/drivers/gpu/drm/scheduler/sched_main.c
>index 80ddbdf..b05b210 100644
>--- a/drivers/gpu/drm/scheduler/sched_main.c
>+++ b/drivers/gpu/drm/scheduler/sched_main.c
>@@ -287,10 +287,24 @@ static void drm_sched_job_timedout(struct
>work_struct *work)
>   unsigned long flags;
>
>   sched = container_of(work, struct drm_gpu_scheduler,
>work_tdr.work);
>+
>+  /*
>+   * Protects against concurrent deletion in drm_sched_cleanup_jobs
>that
>+   * is already in progress.
>+   */
>+  spin_lock_irqsave(&sched->job_list_lock, flags);
>   job = list_first_entry_or_null(&sched->ring_mirror_list,
>  struct drm_sched_job, node);
>
>   if (job) {
>+  /*
>+   * Remove the bad job so it cannot be freed by already in
>progress
>+   * drm_sched_cleanup_jobs. It will be reinsrted back after
>sched->thread
>+   * is parked at which point it's safe.
>+   */
>+  list_del_init(&job->node);
>+  spin_unlock_irqrestore(&sched->job_list_lock, flags);
>+
>   job->sched->ops->timedout_job(job);
>
>   /*
>@@ -302,6 +316,8 @@ static void drm_sched_job_timedout(struct
>work_struct *work)
>   sched->free_guilty = false;
>   }
>   }
>+  else
>+  spin_unlock_irqrestore(&sched->job_list_lock, flags);
>
>   spin_lock_irqsave(&sched->job_list_lock, flags);
>   drm_sched_start_timeout(sched);
>@@ -373,6 +389,19 @@ void drm_sched_stop(struct drm_gpu_scheduler
>*sched, struct drm_sched_job *bad)
>   kthread_park(sched->thread);
>
>   /*
>+   * Reinsert back the bad job here - now it's safe as
>drm_sched_cleanup_jobs
>+   * cannot race against us and release the bad job at this point - we
>parked
>+   * (waited for) any in progress (earlier) cleanups and any later ones 
>will
>+   * bail out due to sched->thread being parked.
>+   */
>+  if (bad && bad->sched == sched)
>+  /*
>+   * Add at the head of the queue to reflect it was the earliest
>+   * job extracted.
>+   */
>+  list_add(&bad->node, &sched->ring_mirror_list);
>+
>+  /*
>* Iterate the job list from later to  earlier one and either deactive
>* their HW callbacks or remove them from mirror list if they already
>* signaled.
>@@ -656,16 +685,19 @@ static void drm_sched_cleanup_jobs(struct
>drm_gpu_scheduler *sched)
>   __kthread_should_park(sched->thread))
>   return;
>
>-
>-  while (!list_empty(&sched->ring_mirror_list)) {
>+  /* See drm_sched_job_timedout for why the locking is here */
>+  while (true) {
>   struct drm_sched_job *job;
>
>-  job = list_first_entry(&sched->ring_mirror_list,
>- struct drm_sched_job, node);
>-  if (!dma_fence_is_signaled(&job->s_fence->finished))
>+  spin_lock_irqsave(&sched->job_list_lock, flags);
>+  job = list_first_entry_or_null(&sched->ring_mirror_list,
>+ struct drm_sched_job, node);
>+
>+  if (!job || !dma_fence_is_signaled(&job->s_fence->finished)) {
>+  spin_unlock_irqrestore(&sched->job_list_lock, flags);
>   break;
>+  }
>
>-  spin_lock_irqsave(&sched->job_list_lock, flags);
>   /* remove job from ring_mirror_list */
>   list_del_init(&job->node);
>   spin_unlock_irqrestore(&sched->job_list_lock, flags);
>--
>2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v2 1/1] drm: Prefer pcie_capability_read_word()

2019-11-19 Thread Frederick Lawler

Hi,

Alex Deucher wrote on 11/18/19 11:42 AM:

On Mon, Nov 18, 2019 at 3:37 AM Frederick Lawler  wrote:


Commit 8c0d3a02c130 ("PCI: Add accessors for PCI Express Capability")
added accessors for the PCI Express Capability so that drivers didn't
need to be aware of differences between v1 and v2 of the PCI
Express Capability.

Replace pci_read_config_word() and pci_write_config_word() calls with
pcie_capability_read_word() and pcie_capability_write_word().

Signed-off-by: Frederick Lawler 

---
V2
- Squash both drm commits into one
- Rebase ontop of d46eac1e658b
---
  drivers/gpu/drm/amd/amdgpu/cik.c | 63 ---
  drivers/gpu/drm/amd/amdgpu/si.c  | 71 +++
  drivers/gpu/drm/radeon/cik.c | 70 ++
  drivers/gpu/drm/radeon/si.c  | 73 


Can you split this into two patches?  One for amdgpu and one for radeon?

Thanks!


Sure thing! I also realize I didn't say where the magical commit ref 
came from. It came from Bjorns pci/misc tree. I'll get that out to you 
Wednesday.




Alex


  4 files changed, 174 insertions(+), 103 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 3067bb874032..f369e3408ed2 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1384,7 +1384,6 @@ static int cik_set_vce_clocks(struct amdgpu_device *adev, 
u32 evclk, u32 ecclk)
  static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
  {
 struct pci_dev *root = adev->pdev->bus->self;
-   int bridge_pos, gpu_pos;
 u32 speed_cntl, current_data_rate;
 int i;
 u16 tmp16;
@@ -1419,12 +1418,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device 
*adev)
 DRM_INFO("enabling PCIE gen 2 link speeds, disable with 
amdgpu.pcie_gen2=0\n");
 }

-   bridge_pos = pci_pcie_cap(root);
-   if (!bridge_pos)
-   return;
-
-   gpu_pos = pci_pcie_cap(adev->pdev);
-   if (!gpu_pos)
+   if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
 return;

 if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1434,14 +1428,17 @@ static void cik_pcie_gen3_enable(struct amdgpu_device 
*adev)
 u16 bridge_cfg2, gpu_cfg2;
 u32 max_lw, current_lw, tmp;

-   pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, 
&bridge_cfg);
-   pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, 
&gpu_cfg);
+   pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+   pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);

 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
-   pci_write_config_word(root, bridge_pos + 
PCI_EXP_LNKCTL, tmp16);
+   pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);

 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
-   pci_write_config_word(adev->pdev, gpu_pos + 
PCI_EXP_LNKCTL, tmp16);
+   pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+  tmp16);

 tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
 max_lw = (tmp & 
PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
@@ -1465,15 +1462,23 @@ static void cik_pcie_gen3_enable(struct amdgpu_device 
*adev)

 for (i = 0; i < 10; i++) {
 /* check status */
-   pci_read_config_word(adev->pdev, gpu_pos + 
PCI_EXP_DEVSTA, &tmp16);
+   pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
 break;

-   pci_read_config_word(root, bridge_pos + 
PCI_EXP_LNKCTL, &bridge_cfg);
-   pci_read_config_word(adev->pdev, gpu_pos + 
PCI_EXP_LNKCTL, &gpu_cfg);
+   pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+   pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);

-   pci_read_config_word(root, bridge_pos + 
PCI_EXP_LNKCTL2, &bridge_cfg2);
-   pci_read_config_word(adev->pdev, gpu_pos + 
PCI_EXP_LNKCTL2, &gpu_cfg2);
+   pcie