[PATCH 25/87] drm/amd/display: add hdmi2.1 dsc pps packet programming

2019-07-15 Thread sunpeng.li
From: Dmytro Laktyushkin 

This change adds EMP packet programming for enabling dsc with
hdmi. The packets are structured according to VESA HDMI 2.1x
r2 spec, section 10.10.2.2.

Signed-off-by: Dmytro Laktyushkin 
Reviewed-by: Charlene Liu 
Acked-by: Leo Li 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c  | 14 +++---
 drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c | 13 ++---
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c |  3 ++-
 drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h|  1 +
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 02a18f6aa009..1e051e953610 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -2769,10 +2769,10 @@ void core_link_enable_stream(
allocate_mst_payload(pipe_ctx);
 
 #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-   if (pipe_ctx->stream->timing.flags.DSC &&
-   (dc_is_dp_signal(pipe_ctx->stream->signal) ||
-   
dc_is_virtual_signal(pipe_ctx->stream->signal))) {
-   dp_set_dsc_enable(pipe_ctx, true);
+   if (pipe_ctx->stream->timing.flags.DSC) {
+   if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
+   
dc_is_virtual_signal(pipe_ctx->stream->signal))
+   dp_set_dsc_enable(pipe_ctx, true);
pipe_ctx->stream_res.tg->funcs->wait_for_state(
pipe_ctx->stream_res.tg,
CRTC_STATE_VBLANK);
@@ -2833,9 +2833,9 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, 
int option)
 
disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal);
 #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-   if (pipe_ctx->stream->timing.flags.DSC &&
-   dc_is_dp_signal(pipe_ctx->stream->signal)) {
-   dp_set_dsc_enable(pipe_ctx, false);
+   if (pipe_ctx->stream->timing.flags.DSC) {
+   if (dc_is_dp_signal(pipe_ctx->stream->signal))
+   dp_set_dsc_enable(pipe_ctx, false);
}
 #endif
 }
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
index 211fadefe2f5..46257f0fcbe7 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
@@ -396,7 +396,7 @@ static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, 
bool enable)
 
 /* This has to be done after DSC was enabled on RX first, i.e. after 
dp_enable_dsc_on_rx() had been called
  */
-static void dp_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
+void set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 {
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
struct dc *core_dc = pipe_ctx->stream->ctx->dc;
@@ -435,7 +435,7 @@ static void dp_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, 
bool enable)
 
dsc_optc_config_log(dsc, &dsc_optc_cfg);
/* Enable DSC in encoder */
-   if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment) && 
pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config)
+   if (dc_is_dp_signal(stream->signal) && 
!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment))

pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config(pipe_ctx->stream_res.stream_enc,

optc_dsc_mode,

dsc_optc_cfg.bytes_per_pixel,
@@ -454,11 +454,10 @@ static void dp_set_dsc_on_stream(struct pipe_ctx 
*pipe_ctx, bool enable)
OPTC_DSC_DISABLED, 0, 0);
 
/* disable DSC in stream encoder */
-   if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+   if (dc_is_dp_signal(stream->signal) && 
!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment))

pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config(
pipe_ctx->stream_res.stream_enc,
OPTC_DSC_DISABLED, 0, 0, NULL);
-   }
 
/* disable DSC block */

pipe_ctx->stream_res.dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc);
@@ -479,12 +478,12 @@ bool dp_set_dsc_enable(struct pipe_ctx *pipe_ctx, bool 
enable)
 
if (enable) {
if (dp_set_dsc_on_rx(pipe_ctx, true)) {
-   dp_set_dsc_on_stream(pipe_ctx, true);
+   set_dsc_on_stream(pipe_ctx, true);
result = true;
}
} else {
dp_set_dsc_on_rx(pipe_ctx, false);
-  

[PATCH 19/87] drm/amd/display: Incorrect Read Interval Time For CR Sequence

2019-07-15 Thread sunpeng.li
From: David Galiffi 

[WHY]
TRAINING_AUX_RD_INTERVAL (DPCD 000Eh) modifies the read interval
for the EQ training sequence. CR read interval should remain 100 us.
Currently, the CR interval is also being modified.

[HOW]
lt_settings->cr_pattern_time should always be 100 us.

Signed-off-by: David Galiffi 
Reviewed-by: Tony Cheng 
Acked-by: Leo Li 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index fca1bfc901b6..4442e7b1e5b5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -1035,7 +1035,7 @@ static void initialize_training_settings(
if (link->preferred_training_settings.cr_pattern_time != NULL)
lt_settings->cr_pattern_time = 
*link->preferred_training_settings.cr_pattern_time;
else
-   lt_settings->cr_pattern_time = 
get_training_aux_rd_interval(link, 100);
+   lt_settings->cr_pattern_time = 100;
 
if (link->preferred_training_settings.eq_pattern_time != NULL)
lt_settings->eq_pattern_time = 
*link->preferred_training_settings.eq_pattern_time;
-- 
2.22.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 05/87] drm/amd/display: fix dsc disable

2019-07-15 Thread sunpeng.li
From: Dmytro Laktyushkin 

A regression caused dsc to never get disabled in certain situations.

Signed-off-by: Dmytro Laktyushkin 
Reviewed-by: Nikola Cornij 
Acked-by: Leo Li 
---
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 6925d25d2457..45f9dad95644 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -1715,8 +1715,11 @@ static void dcn20_reset_back_end_for_pipe(
else if (pipe_ctx->stream_res.audio) {
dc->hwss.disable_audio_stream(pipe_ctx, 
FREE_ACQUIRED_RESOURCE);
}
-
}
+#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+   else if (pipe_ctx->stream_res.dsc)
+   dp_set_dsc_enable(pipe_ctx, false);
+#endif
 
/* by upper caller loop, parent pipe: pipe0, will be reset last.
 * back end share by all pipes and will be disable only when disable
-- 
2.22.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 02/87] drm/amd/display: Add ability to set preferred link training parameters.

2019-07-15 Thread sunpeng.li
From: David Galiffi 

[WHY]
To add support for OS requirement to set preferred link training
parameters.

[HOW]
Create new structure of dp link training overrides. During link training
processes, these values should be used instead of the default training
parameters.

Signed-off-by: David Galiffi 
Reviewed-by: Tony Cheng 
Acked-by: Anthony Koo 
Acked-by: Leo Li 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c |  46 ++-
 .../gpu/drm/amd/display/dc/core/dc_link_dp.c  | 337 +-
 .../drm/amd/display/dc/core/dc_link_hwss.c|  28 +-
 drivers/gpu/drm/amd/display/dc/dc_dp_types.h  |  21 ++
 drivers/gpu/drm/amd/display/dc/dc_link.h  |  11 +
 .../gpu/drm/amd/display/dc/inc/link_hwss.h|   2 +-
 .../amd/display/include/link_service_types.h  |  17 +-
 7 files changed, 338 insertions(+), 124 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 6167e1cb0b48..0918c334f0e2 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -1187,6 +1187,9 @@ static bool construct(
link->ctx = dc_ctx;
link->link_index = init_params->link_index;
 
+   memset(&link->preferred_training_settings, 0, sizeof(struct 
dc_link_training_overrides));
+   memset(&link->preferred_link_setting, 0, sizeof(struct 
dc_link_settings));
+
link->link_id = bios->funcs->get_connector_id(bios, 
init_params->connector_index);
 
if (link->link_id.type != OBJECT_TYPE_CONNECTOR) {
@@ -1465,6 +1468,9 @@ static enum dc_status enable_link_dp(
struct dc_link *link = stream->link;
struct dc_link_settings link_settings = {0};
enum dp_panel_mode panel_mode;
+#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+   bool fec_enable;
+#endif
 
/* get link settings for video mode timing */
decide_link_settings(stream, &link_settings);
@@ -1509,10 +1515,20 @@ static enum dc_status enable_link_dp(
skip_video_pattern = false;
 
 #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-   dp_set_fec_ready(link, true);
+   if (link->preferred_training_settings.fec_enable != NULL)
+   fec_enable = *link->preferred_training_settings.fec_enable;
+   else
+   fec_enable = true;
+
+   dp_set_fec_ready(link, fec_enable);
 #endif
 
-   if (perform_link_training_with_retries(
+   if (link->aux_access_disabled) {
+   dc_link_dp_perform_link_training_skip_aux(link, &link_settings);
+
+   link->cur_link_settings = link_settings;
+   status = DC_OK;
+   } else if (perform_link_training_with_retries(
link,
&link_settings,
skip_video_pattern,
@@ -1524,7 +1540,7 @@ static enum dc_status enable_link_dp(
status = DC_FAIL_DP_LINK_TRAINING;
 
 #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
-   dp_set_fec_enable(link, true);
+   dp_set_fec_enable(link, fec_enable);
 #endif
return status;
 }
@@ -3012,6 +3028,29 @@ void dc_link_set_preferred_link_settings(struct dc *dc,
dp_retrain_link_dp_test(link, &store_settings, false);
 }
 
+void dc_link_set_preferred_training_settings(struct dc *dc,
+struct dc_link_settings 
*link_setting,
+struct 
dc_link_training_overrides *lt_overrides,
+struct dc_link *link,
+bool skip_immediate_retrain)
+{
+   if (lt_overrides != NULL)
+   link->preferred_training_settings = *lt_overrides;
+   else
+   memset(&link->preferred_training_settings, 0, 
sizeof(link->preferred_training_settings));
+
+   if (link_setting != NULL) {
+   link->preferred_link_setting = *link_setting;
+   } else {
+   link->preferred_link_setting.lane_count = LANE_COUNT_UNKNOWN;
+   link->preferred_link_setting.link_rate = LINK_RATE_UNKNOWN;
+   }
+
+   /* Retrain now, or wait until next stream update to apply */
+   if (skip_immediate_retrain == false)
+   dc_link_set_preferred_link_settings(dc, 
&link->preferred_link_setting, link);
+}
+
 void dc_link_enable_hpd(const struct dc_link *link)
 {
dc_link_dp_enable_hpd(link);
@@ -3022,7 +3061,6 @@ void dc_link_disable_hpd(const struct dc_link *link)
dc_link_dp_disable_hpd(link);
 }
 
-
 void dc_link_set_test_pattern(struct dc_link *link,
  enum dp_test_pattern test_pattern,
  const struct link_training_settings 
*p_link_settings,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 056be4c34a98..3f8a8f61cd76 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc

[PATCH 03/87] drm/amd/display: 3.2.36

2019-07-15 Thread sunpeng.li
From: Aric Cyr 

Signed-off-by: Aric Cyr 
Acked-by: Leo Li 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index e513028faefa..2a7f25d372e1 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -39,7 +39,7 @@
 #include "inc/hw/dmcu.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.2.35"
+#define DC_VER "3.2.36"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
2.22.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/1] drm/amdgpu: Optimize KFD page table reservation

2019-07-15 Thread Kuehling, Felix
Be less pessimistic about estimated page table use for KFD. Most
allocations use 2MB pages and therefore need less VRAM for page
tables. This allows more VRAM to be used for applications especially
on large systems with many GPUs and hundreds of GB of system memory.

Example: 8 GPUs with 32GB VRAM each + 256GB system memory = 512GB
Old page table reservation per GPU:  1GB
New page table reservation per GPU: 32MB

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 2db6e498c069..dbfa3f308e0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -112,11 +112,24 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
(kfd_mem_limit.max_ttm_mem_limit >> 20));
 }
 
+/* Estimate page table size needed to represent a given memory size
+ *
+ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
+ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
+ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
+ * for 2MB pages for TLB efficiency. However, small allocations and
+ * fragmented system memory still need some 4KB pages. We choose a
+ * compromise that should work in most cases without reserving too
+ * much memory for page tables unnecessarily (factor 16K, >> 14).
+ */
+#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
+
 static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain, bool sg)
 {
+   uint64_t reserved_for_pt =
+   ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
-   uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
int ret = 0;
 
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Fix unaligned memory copies

2019-07-15 Thread Christian König

Am 15.07.19 um 18:12 schrieb Alex Deucher:

On Sat, Jul 13, 2019 at 2:43 AM Kuehling, Felix  wrote:

When starting a new mm_node, the page_offset becomes 0.

Signed-off-by: Felix Kuehling 

Reviewed-by: Alex Deucher 


Reviewed-by: Christian König 




---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 37d9a3b09946..d0f6c23ec7cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -386,6 +386,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
  src->mem);
 src_node_size = (src_mm->size << PAGE_SHIFT);
+   src_page_offset = 0;
 } else {
 src_node_start += cur_size;
 src_page_offset = src_node_start & (PAGE_SIZE - 1);
@@ -395,6 +396,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
  dst->mem);
 dst_node_size = (dst_mm->size << PAGE_SHIFT);
+   dst_page_offset = 0;
 } else {
 dst_node_start += cur_size;
 dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v18 11/15] IB/mlx4: untag user pointers in mlx4_get_umem_mr

2019-07-15 Thread Jason Gunthorpe
On Mon, Jul 15, 2019 at 06:01:29PM +0200, Andrey Konovalov wrote:
> On Mon, Jun 24, 2019 at 7:40 PM Catalin Marinas  
> wrote:
> >
> > On Mon, Jun 24, 2019 at 04:32:56PM +0200, Andrey Konovalov wrote:
> > > This patch is a part of a series that extends kernel ABI to allow to pass
> > > tagged user pointers (with the top byte set to something else other than
> > > 0x00) as syscall arguments.
> > >
> > > mlx4_get_umem_mr() uses provided user pointers for vma lookups, which can
> > > only by done with untagged pointers.
> > >
> > > Untag user pointers in this function.
> > >
> > > Signed-off-by: Andrey Konovalov 
> > >  drivers/infiniband/hw/mlx4/mr.c | 7 ---
> > >  1 file changed, 4 insertions(+), 3 deletions(-)
> >
> > Acked-by: Catalin Marinas 
> >
> > This patch also needs an ack from the infiniband maintainers (Jason).
> 
> Hi Jason,
> 
> Could you take a look and give your acked-by?

Oh, I think I did this a long time ago. Still looks OK. You will send
it?

Reviewed-by: Jason Gunthorpe 

Jason


Re: HMM related use-after-free with amdgpu

2019-07-15 Thread Jason Gunthorpe
On Mon, Jul 15, 2019 at 06:51:06PM +0200, Michel Dänzer wrote:
> 
> With a KASAN enabled kernel built from amd-staging-drm-next, the
> attached use-after-free is pretty reliably detected during a piglit gpu run.

Does this branch you are testing have the hmm.git merged? I think from
the name it does not?

Use after free's of this nature were something that was fixed in
hmm.git..

I don't see an obvious way you can hit something like this with the
new code arrangement..

> P.S. With my standard kernels without KASAN (currently 5.2.y + drm-next
> changes for 5.3), I'm having trouble lately completing a piglit run,
> running into various issues which look like memory corruption, so might
> be related.

I'm skeptical that the AMDGPU implementation of the locking around the
hmm_range & mirror is working, it doesn'r follow the perscribed
pattern at least.

> Jul 15 18:09:29 kaveri kernel: [  560.388751][T12568] 
> ==
> Jul 15 18:09:29 kaveri kernel: [  560.389063][T12568] BUG: KASAN: 
> use-after-free in __mmu_notifier_release+0x286/0x3e0
> Jul 15 18:09:29 kaveri kernel: [  560.389068][T12568] Read of size 8 at addr 
> 88835e1c7cb0 by task amd_pinned_memo/12568
> Jul 15 18:09:29 kaveri kernel: [  560.389071][T12568] 
> Jul 15 18:09:29 kaveri kernel: [  560.389077][T12568] CPU: 9 PID: 12568 Comm: 
> amd_pinned_memo Tainted: G   OE 5.2.0-rc1-00811-g2ad5a7d31bdf #125
> Jul 15 18:09:29 kaveri kernel: [  560.389080][T12568] Hardware name: 
> Micro-Star International Co., Ltd. MS-7A34/B350 TOMAHAWK (MS-7A34), BIOS 1.80 
> 09/13/2017
> Jul 15 18:09:29 kaveri kernel: [  560.389084][T12568] Call Trace:
> Jul 15 18:09:29 kaveri kernel: [  560.389091][T12568]  dump_stack+0x7c/0xc0
> Jul 15 18:09:29 kaveri kernel: [  560.389097][T12568]  ? 
> __mmu_notifier_release+0x286/0x3e0
> Jul 15 18:09:29 kaveri kernel: [  560.389101][T12568]  
> print_address_description+0x65/0x22e
> Jul 15 18:09:29 kaveri kernel: [  560.389106][T12568]  ? 
> __mmu_notifier_release+0x286/0x3e0
> Jul 15 18:09:29 kaveri kernel: [  560.389110][T12568]  ? 
> __mmu_notifier_release+0x286/0x3e0
> Jul 15 18:09:29 kaveri kernel: [  560.389115][T12568]  
> __kasan_report.cold.3+0x1a/0x3d
> Jul 15 18:09:29 kaveri kernel: [  560.389122][T12568]  ? 
> __mmu_notifier_release+0x286/0x3e0
> Jul 15 18:09:29 kaveri kernel: [  560.389128][T12568]  kasan_report+0xe/0x20
> Jul 15 18:09:29 kaveri kernel: [  560.389132][T12568]  
> __mmu_notifier_release+0x286/0x3e0

So we are iterating over the mn list and touched free'd memory

> Jul 15 18:09:29 kaveri kernel: [  560.389309][T12568] Allocated by task 12568:
> Jul 15 18:09:29 kaveri kernel: [  560.389314][T12568]  save_stack+0x19/0x80
> Jul 15 18:09:29 kaveri kernel: [  560.389318][T12568]  
> __kasan_kmalloc.constprop.8+0xc1/0xd0
> Jul 15 18:09:29 kaveri kernel: [  560.389323][T12568]  
> hmm_get_or_create+0x8f/0x3f0

The memory is probably a struct hmm

> Jul 15 18:09:29 kaveri kernel: [  560.389857][T12568] Freed by task 12568:
> Jul 15 18:09:29 kaveri kernel: [  560.389860][T12568]  save_stack+0x19/0x80
> Jul 15 18:09:29 kaveri kernel: [  560.389864][T12568]  
> __kasan_slab_free+0x125/0x170
> Jul 15 18:09:29 kaveri kernel: [  560.389867][T12568]  kfree+0xe2/0x290
> Jul 15 18:09:29 kaveri kernel: [  560.389871][T12568]  
> __mmu_notifier_release+0xef/0x3e0
> Jul 15 18:09:29 kaveri kernel: [  560.389875][T12568]  exit_mmap+0x93/0x400

And the free was also done in notifier_release (presumably the
backtrace is corrupt and this is really in the old hmm_release ->
hmm_put -> hmm_free -> kfree call chain)

Which was not OK, as __mmu_notifier_release doesn't use a 'safe' hlist
iterator, so the release callback can never trigger kfree of a struct
mmu_notifier.

The new hmm.git code does not call kfree from release, it schedules
that through a SRCU which won't run until __mmu_notifier_release
returns, by definition. 

So should be fixed.

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

HMM related use-after-free with amdgpu

2019-07-15 Thread Michel Dänzer

With a KASAN enabled kernel built from amd-staging-drm-next, the
attached use-after-free is pretty reliably detected during a piglit gpu run.

Any ideas?


P.S. With my standard kernels without KASAN (currently 5.2.y + drm-next
changes for 5.3), I'm having trouble lately completing a piglit run,
running into various issues which look like memory corruption, so might
be related.

-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
Jul 15 18:09:29 kaveri kernel: [  560.388751][T12568] ==
Jul 15 18:09:29 kaveri kernel: [  560.389063][T12568] BUG: KASAN: use-after-free in __mmu_notifier_release+0x286/0x3e0
Jul 15 18:09:29 kaveri kernel: [  560.389068][T12568] Read of size 8 at addr 88835e1c7cb0 by task amd_pinned_memo/12568
Jul 15 18:09:29 kaveri kernel: [  560.389071][T12568] 
Jul 15 18:09:29 kaveri kernel: [  560.389077][T12568] CPU: 9 PID: 12568 Comm: amd_pinned_memo Tainted: G   OE 5.2.0-rc1-00811-g2ad5a7d31bdf #125
Jul 15 18:09:29 kaveri kernel: [  560.389080][T12568] Hardware name: Micro-Star International Co., Ltd. MS-7A34/B350 TOMAHAWK (MS-7A34), BIOS 1.80 09/13/2017
Jul 15 18:09:29 kaveri kernel: [  560.389084][T12568] Call Trace:
Jul 15 18:09:29 kaveri kernel: [  560.389091][T12568]  dump_stack+0x7c/0xc0
Jul 15 18:09:29 kaveri kernel: [  560.389097][T12568]  ? __mmu_notifier_release+0x286/0x3e0
Jul 15 18:09:29 kaveri kernel: [  560.389101][T12568]  print_address_description+0x65/0x22e
Jul 15 18:09:29 kaveri kernel: [  560.389106][T12568]  ? __mmu_notifier_release+0x286/0x3e0
Jul 15 18:09:29 kaveri kernel: [  560.389110][T12568]  ? __mmu_notifier_release+0x286/0x3e0
Jul 15 18:09:29 kaveri kernel: [  560.389115][T12568]  __kasan_report.cold.3+0x1a/0x3d
Jul 15 18:09:29 kaveri kernel: [  560.389122][T12568]  ? __mmu_notifier_release+0x286/0x3e0
Jul 15 18:09:29 kaveri kernel: [  560.389128][T12568]  kasan_report+0xe/0x20
Jul 15 18:09:29 kaveri kernel: [  560.389132][T12568]  __mmu_notifier_release+0x286/0x3e0
Jul 15 18:09:29 kaveri kernel: [  560.389142][T12568]  exit_mmap+0x93/0x400
Jul 15 18:09:29 kaveri kernel: [  560.389146][T12568]  ? quarantine_put+0xb7/0x150
Jul 15 18:09:29 kaveri kernel: [  560.389151][T12568]  ? do_munmap+0x10/0x10
Jul 15 18:09:29 kaveri kernel: [  560.389156][T12568]  ? lockdep_hardirqs_on+0x37f/0x560
Jul 15 18:09:29 kaveri kernel: [  560.389165][T12568]  ? __khugepaged_exit+0x2af/0x3e0
Jul 15 18:09:29 kaveri kernel: [  560.389169][T12568]  ? __khugepaged_exit+0x2af/0x3e0
Jul 15 18:09:29 kaveri kernel: [  560.389174][T12568]  ? rcu_read_lock_sched_held+0xd8/0x110
Jul 15 18:09:29 kaveri kernel: [  560.389179][T12568]  ? kmem_cache_free+0x279/0x2c0
Jul 15 18:09:29 kaveri kernel: [  560.389185][T12568]  ? __khugepaged_exit+0x2be/0x3e0
Jul 15 18:09:29 kaveri kernel: [  560.389192][T12568]  mmput+0xb2/0x390
Jul 15 18:09:29 kaveri kernel: [  560.389199][T12568]  do_exit+0x880/0x2a70
Jul 15 18:09:29 kaveri kernel: [  560.389207][T12568]  ? find_held_lock+0x33/0x1c0
Jul 15 18:09:29 kaveri kernel: [  560.389213][T12568]  ? mm_update_next_owner+0x5d0/0x5d0
Jul 15 18:09:29 kaveri kernel: [  560.389218][T12568]  ? __do_page_fault+0x41d/0xa20
Jul 15 18:09:29 kaveri kernel: [  560.389226][T12568]  ? lock_downgrade+0x620/0x620
Jul 15 18:09:29 kaveri kernel: [  560.389232][T12568]  ? handle_mm_fault+0x4ab/0x6a0
Jul 15 18:09:29 kaveri kernel: [  560.389242][T12568]  do_group_exit+0xf0/0x2e0
Jul 15 18:09:29 kaveri kernel: [  560.389249][T12568]  __x64_sys_exit_group+0x3a/0x50
Jul 15 18:09:29 kaveri kernel: [  560.389255][T12568]  do_syscall_64+0x9c/0x430
Jul 15 18:09:29 kaveri kernel: [  560.389261][T12568]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
Jul 15 18:09:29 kaveri kernel: [  560.389266][T12568] RIP: 0033:0x7fc23d8ed9d6
Jul 15 18:09:29 kaveri kernel: [  560.389271][T12568] Code: 00 4c 8b 0d bc 44 0f 00 eb 19 66 2e 0f 1f 84 00 00 00 00 00 89 d7 89 f0 0f 05 48 3d 00 f0 ff ff 77 22 f4 89 d7 44 89 c0 0f 05 <48> 3d 00 f0 ff ff 76 e2 f7 d8 64 41 89 01 eb da 66 2e 0f 1f 84 00
Jul 15 18:09:29 kaveri kernel: [  560.389275][T12568] RSP: 002b:7fff8c3bcfa8 EFLAGS: 0246 ORIG_RAX: 00e7
Jul 15 18:09:29 kaveri kernel: [  560.389280][T12568] RAX: ffda RBX: 7fc23d9de760 RCX: 7fc23d8ed9d6
Jul 15 18:09:29 kaveri kernel: [  560.389283][T12568] RDX:  RSI: 003c RDI: 
Jul 15 18:09:29 kaveri kernel: [  560.389287][T12568] RBP:  R08: 00e7 R09: ff48
Jul 15 18:09:29 kaveri kernel: [  560.389290][T12568] R10:  R11: 0246 R12: 7fc23d9de760
Jul 15 18:09:29 kaveri kernel: [  560.389293][T12568] R13: 04f0 R14: 7fc23d9e7428 R15: 
Jul 15 18:09:29 kaveri kernel: [  560.389306][T12568] 
Jul 15 18:09:29 kaveri kernel: [  560.389309][T12568] Allocated by task 12568:
Jul

Re: [PATCH] drm/amdgpu: replace simple_strtol() by kstrtou32()

2019-07-15 Thread Alex Deucher
On Mon, Jul 15, 2019 at 10:57 AM  wrote:
>
> From: Wang Xiayang 
>
> The simple_strtol() function is deprecated. kstrto[l,u32]() is
> the correct replacement as it can properly handle overflows.
>
> This patch replaces the deprecated simple_strtol() use introduced recently.
> As clk is of type uint32_t, we are safe to use kstrtou32().
>
> It is also safe to return zero on string parsing error,
> similar to the case of returning zero if buf is empty in parse_clk().
>
> Fixes: bb5a2bdf36a8 ("drm/amdgpu: support dpm level modification under 
> virtualization v3")
> Signed-off-by: Wang Xiayang 

Applied.  Thanks!

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 7d484fad3909..2adda47def64 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -386,7 +386,8 @@ static uint32_t parse_clk(char *buf, bool min)
>  if (!ptr)
>  break;
>  ptr+=2;
> -clk = simple_strtoul(ptr, NULL, 10);
> +   if (kstrtou32(ptr, 10, &clk))
> +   return 0;
>  } while (!min);
>
>  return clk * 100;
> --
> 2.11.0
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v3 05/24] drm/amdgpu: remove memset after kzalloc

2019-07-15 Thread Alex Deucher
On Mon, Jul 15, 2019 at 3:57 AM Fuqian Huang  wrote:
>
> kzalloc has already zeroed the memory during the allocation.
> So memset is unneeded.
>
> Signed-off-by: Fuqian Huang 

Applied. thanks!

Alex

> ---
> Changes in v3:
>   - Fix subject prefix: gpu/drm -> drm/amdgpu
>
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c   | 2 --
>  drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c | 2 --
>  drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c| 2 --
>  drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c   | 2 --
>  drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c | 2 --
>  5 files changed, 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
> index fd22b4474dbf..4e6da61d1a93 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
> @@ -279,8 +279,6 @@ void *amdgpu_dm_irq_register_interrupt(struct 
> amdgpu_device *adev,
> return DAL_INVALID_IRQ_HANDLER_IDX;
> }
>
> -   memset(handler_data, 0, sizeof(*handler_data));
> -
> init_handler_common_data(handler_data, ih, handler_args, &adev->dm);
>
> irq_source = int_params->irq_source;
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
> index 1cd5a8b5cdc1..b760f95e7fa7 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
> @@ -1067,8 +1067,6 @@ static int pp_tables_v1_0_initialize(struct pp_hwmgr 
> *hwmgr)
> PP_ASSERT_WITH_CODE((NULL != hwmgr->pptable),
> "Failed to allocate hwmgr->pptable!", return 
> -ENOMEM);
>
> -   memset(hwmgr->pptable, 0x00, sizeof(struct phm_ppt_v1_information));
> -
> powerplay_table = get_powerplay_table(hwmgr);
>
> PP_ASSERT_WITH_CODE((NULL != powerplay_table),
> diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c 
> b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
> index 669bd0c2a16c..d55e264c5df5 100644
> --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
> @@ -2702,8 +2702,6 @@ static int ci_initialize_mc_reg_table(struct pp_hwmgr 
> *hwmgr)
> cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS2_LP, 
> cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS2));
> cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_2_LP, 
> cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_2));
>
> -   memset(table, 0x00, sizeof(pp_atomctrl_mc_reg_table));
> -
> result = atomctrl_initialize_mc_reg_table(hwmgr, module_index, table);
>
> if (0 == result)
> diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c 
> b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
> index 375ccf6ff5f2..c123b4d9c621 100644
> --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
> @@ -2631,8 +2631,6 @@ static int iceland_initialize_mc_reg_table(struct 
> pp_hwmgr *hwmgr)
> cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS2_LP, 
> cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS2));
> cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_2_LP, 
> cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_2));
>
> -   memset(table, 0x00, sizeof(pp_atomctrl_mc_reg_table));
> -
> result = atomctrl_initialize_mc_reg_table(hwmgr, module_index, table);
>
> if (0 == result)
> diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c 
> b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
> index 3ed6c5f1e5cf..60462c7211e3 100644
> --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
> @@ -3114,8 +3114,6 @@ static int tonga_initialize_mc_reg_table(struct 
> pp_hwmgr *hwmgr)
> cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_2_LP,
> cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_2));
>
> -   memset(table, 0x00, sizeof(pp_atomctrl_mc_reg_table));
> -
> result = atomctrl_initialize_mc_reg_table(hwmgr, module_index, table);
>
> if (!result)
> --
> 2.11.0
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Fix unaligned memory copies

2019-07-15 Thread Alex Deucher
On Sat, Jul 13, 2019 at 2:43 AM Kuehling, Felix  wrote:
>
> When starting a new mm_node, the page_offset becomes 0.
>
> Signed-off-by: Felix Kuehling 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 37d9a3b09946..d0f6c23ec7cf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -386,6 +386,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> src_node_start = amdgpu_mm_node_addr(src->bo, 
> ++src_mm,
>  src->mem);
> src_node_size = (src_mm->size << PAGE_SHIFT);
> +   src_page_offset = 0;
> } else {
> src_node_start += cur_size;
> src_page_offset = src_node_start & (PAGE_SIZE - 1);
> @@ -395,6 +396,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> dst_node_start = amdgpu_mm_node_addr(dst->bo, 
> ++dst_mm,
>  dst->mem);
> dst_node_size = (dst_mm->size << PAGE_SHIFT);
> +   dst_page_offset = 0;
> } else {
> dst_node_start += cur_size;
> dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/amdgpu: hide #warning for missing DC config

2019-07-15 Thread Alex Deucher
On Fri, Jul 12, 2019 at 5:41 AM Arnd Bergmann  wrote:
>
> It is annoying to have #warnings that trigger in randconfig
> builds like
>
> drivers/gpu/drm/amd/amdgpu/soc15.c:653:3: error: "Enable CONFIG_DRM_AMD_DC 
> for display support on SOC15."
> drivers/gpu/drm/amd/amdgpu/nv.c:400:3: error: "Enable CONFIG_DRM_AMD_DC for 
> display support on navi."
>
> Remove these and rely on the users to turn these on.
>
> Signed-off-by: Arnd Bergmann 

Applied.  thanks!

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/nv.c| 2 --
>  drivers/gpu/drm/amd/amdgpu/soc15.c | 4 
>  2 files changed, 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
> index 9253c03d387a..10ec0e81ee58 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -396,8 +396,6 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
>  #if defined(CONFIG_DRM_AMD_DC)
> else if (amdgpu_device_has_dc_support(adev))
> amdgpu_device_ip_block_add(adev, &dm_ip_block);
> -#else
> -#  warning "Enable CONFIG_DRM_AMD_DC for display support on navi."
>  #endif
> amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
> amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 87152d8ef0df..90fb0149fbea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -649,8 +649,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
>  #if defined(CONFIG_DRM_AMD_DC)
> else if (amdgpu_device_has_dc_support(adev))
> amdgpu_device_ip_block_add(adev, &dm_ip_block);
> -#else
> -#  warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15."
>  #endif
> if (!(adev->asic_type == CHIP_VEGA20 && 
> amdgpu_sriov_vf(adev))) {
> amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block);
> @@ -671,8 +669,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
>  #if defined(CONFIG_DRM_AMD_DC)
> else if (amdgpu_device_has_dc_support(adev))
> amdgpu_device_ip_block_add(adev, &dm_ip_block);
> -#else
> -#  warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15."
>  #endif
> amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block);
> break;
> --
> 2.20.0
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v18 11/15] IB/mlx4: untag user pointers in mlx4_get_umem_mr

2019-07-15 Thread Andrey Konovalov
On Mon, Jun 24, 2019 at 7:40 PM Catalin Marinas  wrote:
>
> On Mon, Jun 24, 2019 at 04:32:56PM +0200, Andrey Konovalov wrote:
> > This patch is a part of a series that extends kernel ABI to allow to pass
> > tagged user pointers (with the top byte set to something else other than
> > 0x00) as syscall arguments.
> >
> > mlx4_get_umem_mr() uses provided user pointers for vma lookups, which can
> > only by done with untagged pointers.
> >
> > Untag user pointers in this function.
> >
> > Signed-off-by: Andrey Konovalov 
> > ---
> >  drivers/infiniband/hw/mlx4/mr.c | 7 ---
> >  1 file changed, 4 insertions(+), 3 deletions(-)
>
> Acked-by: Catalin Marinas 
>
> This patch also needs an ack from the infiniband maintainers (Jason).

Hi Jason,

Could you take a look and give your acked-by?

Thanks!

>
> --
> Catalin


Re: [PATCH v18 08/15] userfaultfd: untag user pointers

2019-07-15 Thread Andrey Konovalov
On Mon, Jun 24, 2019 at 7:51 PM Catalin Marinas  wrote:
>
> On Mon, Jun 24, 2019 at 04:32:53PM +0200, Andrey Konovalov wrote:
> > This patch is a part of a series that extends kernel ABI to allow to pass
> > tagged user pointers (with the top byte set to something else other than
> > 0x00) as syscall arguments.
> >
> > userfaultfd code use provided user pointers for vma lookups, which can
> > only by done with untagged pointers.
> >
> > Untag user pointers in validate_range().
> >
> > Reviewed-by: Vincenzo Frascino 
> > Reviewed-by: Catalin Marinas 
> > Reviewed-by: Kees Cook 
> > Signed-off-by: Andrey Konovalov 
> > ---
> >  fs/userfaultfd.c | 22 --
> >  1 file changed, 12 insertions(+), 10 deletions(-)
>
> Same here, it needs an ack from Al Viro.

Hi Al,

Could you take a look at this one as well and give your acked-by?

Thanks!

>
> > diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> > index ae0b8b5f69e6..c2be36a168ca 100644
> > --- a/fs/userfaultfd.c
> > +++ b/fs/userfaultfd.c
> > @@ -1261,21 +1261,23 @@ static __always_inline void wake_userfault(struct 
> > userfaultfd_ctx *ctx,
> >  }
> >
> >  static __always_inline int validate_range(struct mm_struct *mm,
> > -   __u64 start, __u64 len)
> > +   __u64 *start, __u64 len)
> >  {
> >   __u64 task_size = mm->task_size;
> >
> > - if (start & ~PAGE_MASK)
> > + *start = untagged_addr(*start);
> > +
> > + if (*start & ~PAGE_MASK)
> >   return -EINVAL;
> >   if (len & ~PAGE_MASK)
> >   return -EINVAL;
> >   if (!len)
> >   return -EINVAL;
> > - if (start < mmap_min_addr)
> > + if (*start < mmap_min_addr)
> >   return -EINVAL;
> > - if (start >= task_size)
> > + if (*start >= task_size)
> >   return -EINVAL;
> > - if (len > task_size - start)
> > + if (len > task_size - *start)
> >   return -EINVAL;
> >   return 0;
> >  }
> > @@ -1325,7 +1327,7 @@ static int userfaultfd_register(struct 
> > userfaultfd_ctx *ctx,
> >   goto out;
> >   }
> >
> > - ret = validate_range(mm, uffdio_register.range.start,
> > + ret = validate_range(mm, &uffdio_register.range.start,
> >uffdio_register.range.len);
> >   if (ret)
> >   goto out;
> > @@ -1514,7 +1516,7 @@ static int userfaultfd_unregister(struct 
> > userfaultfd_ctx *ctx,
> >   if (copy_from_user(&uffdio_unregister, buf, 
> > sizeof(uffdio_unregister)))
> >   goto out;
> >
> > - ret = validate_range(mm, uffdio_unregister.start,
> > + ret = validate_range(mm, &uffdio_unregister.start,
> >uffdio_unregister.len);
> >   if (ret)
> >   goto out;
> > @@ -1665,7 +1667,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx 
> > *ctx,
> >   if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
> >   goto out;
> >
> > - ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
> > + ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len);
> >   if (ret)
> >   goto out;
> >
> > @@ -1705,7 +1707,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx 
> > *ctx,
> >  sizeof(uffdio_copy)-sizeof(__s64)))
> >   goto out;
> >
> > - ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
> > + ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len);
> >   if (ret)
> >   goto out;
> >   /*
> > @@ -1761,7 +1763,7 @@ static int userfaultfd_zeropage(struct 
> > userfaultfd_ctx *ctx,
> >  sizeof(uffdio_zeropage)-sizeof(__s64)))
> >   goto out;
> >
> > - ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
> > + ret = validate_range(ctx->mm, &uffdio_zeropage.range.start,
> >uffdio_zeropage.range.len);
> >   if (ret)
> >   goto out;
> > --
> > 2.22.0.410.gd8fdbe21b5-goog
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH v18 07/15] fs/namespace: untag user pointers in copy_mount_options

2019-07-15 Thread Andrey Konovalov
On Mon, Jun 24, 2019 at 7:50 PM Catalin Marinas  wrote:
>
> On Mon, Jun 24, 2019 at 04:32:52PM +0200, Andrey Konovalov wrote:
> > This patch is a part of a series that extends kernel ABI to allow to pass
> > tagged user pointers (with the top byte set to something else other than
> > 0x00) as syscall arguments.
> >
> > In copy_mount_options a user address is being subtracted from TASK_SIZE.
> > If the address is lower than TASK_SIZE, the size is calculated to not
> > allow the exact_copy_from_user() call to cross TASK_SIZE boundary.
> > However if the address is tagged, then the size will be calculated
> > incorrectly.
> >
> > Untag the address before subtracting.
> >
> > Reviewed-by: Khalid Aziz 
> > Reviewed-by: Vincenzo Frascino 
> > Reviewed-by: Kees Cook 
> > Reviewed-by: Catalin Marinas 
> > Signed-off-by: Andrey Konovalov 
> > ---
> >  fs/namespace.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/fs/namespace.c b/fs/namespace.c
> > index 7660c2749c96..ec78f7223917 100644
> > --- a/fs/namespace.c
> > +++ b/fs/namespace.c
> > @@ -2994,7 +2994,7 @@ void *copy_mount_options(const void __user * data)
> >* the remainder of the page.
> >*/
> >   /* copy_from_user cannot cross TASK_SIZE ! */
> > - size = TASK_SIZE - (unsigned long)data;
> > + size = TASK_SIZE - (unsigned long)untagged_addr(data);
> >   if (size > PAGE_SIZE)
> >   size = PAGE_SIZE;
>
> I think this patch needs an ack from Al Viro (cc'ed).
>
> --
> Catalin

Hi Al,

Could you take a look and give your acked-by?

Thanks!
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 6/7] drm/amd/powerplay: Use proper enums in vega20_print_clk_levels

2019-07-15 Thread Nathan Chancellor
On Mon, Jul 15, 2019 at 11:25:29AM +0200, Arnd Bergmann wrote:
> On Thu, Jul 4, 2019 at 7:52 AM Nathan Chancellor
>  wrote:
> >
> > clang warns:
> >
> > drivers/gpu/drm/amd/amdgpu/../powerplay/vega20_ppt.c:995:39: warning:
> > implicit conversion from enumeration type 'PPCLK_e' to different
> > enumeration type 'enum smu_clk_type' [-Wenum-conversion]
> > ret = smu_get_current_clk_freq(smu, PPCLK_SOCCLK, &now);
> >   ~~^~~
> > drivers/gpu/drm/amd/amdgpu/../powerplay/vega20_ppt.c:1016:39: warning:
> > implicit conversion from enumeration type 'PPCLK_e' to different
> > enumeration type 'enum smu_clk_type' [-Wenum-conversion]
> > ret = smu_get_current_clk_freq(smu, PPCLK_FCLK, &now);
> >   ~~^
> > drivers/gpu/drm/amd/amdgpu/../powerplay/vega20_ppt.c:1031:39: warning:
> > implicit conversion from enumeration type 'PPCLK_e' to different
> > enumeration type 'enum smu_clk_type' [-Wenum-conversion]
> > ret = smu_get_current_clk_freq(smu, PPCLK_DCEFCLK, &now);
> >   ~~^~~~
> >
> > The values are mapped one to one in vega20_get_smu_clk_index so just use
> > the proper enums here.
> >
> > Fixes: 096761014227 ("drm/amd/powerplay: support sysfs to get socclk, fclk, 
> > dcefclk")
> > Link: https://github.com/ClangBuiltLinux/linux/issues/587
> > Signed-off-by: Nathan Chancellor 
> > ---
> 
> Adding Kevin Wang for further review, as he sent a related patch in
> d36893362d22 ("drm/amd/powerplay: fix smu clock type change miss error")
> 
> I assume this one is still required as it triggers the same warning.
> Kevin, can you have a look?
> 
>   Arnd

Indeed, this one and https://github.com/ClangBuiltLinux/linux/issues/586
are still outstanding.

https://patchwork.freedesktop.org/patch/315581/

Cheers,
Nathan

> 
> >  drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c 
> > b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> > index 0f14fe14ecd8..e62dd6919b24 100644
> > --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> > +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> > @@ -992,7 +992,7 @@ static int vega20_print_clk_levels(struct smu_context 
> > *smu,
> > break;
> >
> > case SMU_SOCCLK:
> > -   ret = smu_get_current_clk_freq(smu, PPCLK_SOCCLK, &now);
> > +   ret = smu_get_current_clk_freq(smu, SMU_SOCCLK, &now);
> > if (ret) {
> > pr_err("Attempt to get current socclk Failed!");
> > return ret;
> > @@ -1013,7 +1013,7 @@ static int vega20_print_clk_levels(struct smu_context 
> > *smu,
> > break;
> >
> > case SMU_FCLK:
> > -   ret = smu_get_current_clk_freq(smu, PPCLK_FCLK, &now);
> > +   ret = smu_get_current_clk_freq(smu, SMU_FCLK, &now);
> > if (ret) {
> > pr_err("Attempt to get current fclk Failed!");
> > return ret;
> > @@ -1028,7 +1028,7 @@ static int vega20_print_clk_levels(struct smu_context 
> > *smu,
> > break;
> >
> > case SMU_DCEFCLK:
> > -   ret = smu_get_current_clk_freq(smu, PPCLK_DCEFCLK, &now);
> > +   ret = smu_get_current_clk_freq(smu, SMU_DCEFCLK, &now);
> > if (ret) {
> > pr_err("Attempt to get current dcefclk Failed!");
> > return ret;


[PATCH] drm/amdgpu: replace simple_strtol() by kstrtou32()

2019-07-15 Thread xywang . sjtu
From: Wang Xiayang 

The simple_strtol() function is deprecated. kstrto[l,u32]() is
the correct replacement as it can properly handle overflows.

This patch replaces the deprecated simple_strtol() use introduced recently.
As clk is of type uint32_t, we are safe to use kstrtou32().

It is also safe to return zero on string parsing error,
similar to the case of returning zero if buf is empty in parse_clk().

Fixes: bb5a2bdf36a8 ("drm/amdgpu: support dpm level modification under 
virtualization v3")
Signed-off-by: Wang Xiayang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 7d484fad3909..2adda47def64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -386,7 +386,8 @@ static uint32_t parse_clk(char *buf, bool min)
 if (!ptr)
 break;
 ptr+=2;
-clk = simple_strtoul(ptr, NULL, 10);
+   if (kstrtou32(ptr, 10, &clk))
+   return 0;
 } while (!min);
 
 return clk * 100;
-- 
2.11.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Fix Vega20 Perf counter for pcie_bw

2019-07-15 Thread Deucher, Alexander
Acked-by: Alex Deucher 

From: amd-gfx  on behalf of Russell, 
Kent 
Sent: Monday, July 15, 2019 8:54 AM
To: amd-gfx@lists.freedesktop.org
Cc: Russell, Kent
Subject: [PATCH] drm/amdgpu: Fix Vega20 Perf counter for pcie_bw

The perf counter for Vega20 is 108, instead of 104 which it was on all
previous GPUs, so add a check to use the appropriate value.

Change-Id: Id5b5026a03b09d8b9d52dda85e17ed5acd818912
Signed-off-by: Kent Russell 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 1e424d918334..852ad0a07995 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -716,9 +716,15 @@ static void soc15_get_pcie_usage(struct amdgpu_device 
*adev, uint64_t *count0,
 return;

 /* Set the 2 events that we wish to watch, defined above */
-   /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
+   /* Reg 40 is # received msgs */
 perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
-   perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
+   /* Pre-VG20, Reg 104 is # of posted requests sent. On VG20 it's 108 */
+   if (adev->asic_type == CHIP_VEGA20)
+   perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK,
+   EVENT1_SEL, 108);
+   else
+   perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK,
+   EVENT1_SEL, 104);

 /* Write to enable desired perf counters */
 WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr);
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: drm/amd/powerplay: remove redundant memset

2019-07-15 Thread Markus Elfring
> kzalloc has already zeroed the memory.
> So the memset is unneeded.

See also a previous patch:
drm/amd/powerplay: Delete a redundant memory setting in 
vega20_set_default_od8_setttings()
https://lore.kernel.org/lkml/de3f6a5e-8ac4-bc8e-0d0c-3a4a5db28...@web.de/
https://lore.kernel.org/patchwork/patch/1089691/
https://lkml.org/lkml/2019/6/17/460

Regards,
Markus


Re: [PATCH] drm/amd/display: Remove check for 0 kHz clock values

2019-07-15 Thread Paul Menzel
Dear Nicholas,


On 7/15/19 2:57 PM, Kazlauskas, Nicholas wrote:
> On 7/15/19 6:34 AM, Paul Menzel wrote:
>> From 09c1952466752033722b02d9c7e5532e1982f6d9 Mon Sep 17 00:00:00 2001
>> From: Paul Menzel 
>> Date: Sat, 13 Jul 2019 20:33:49 +0200
>>
>> This basically reverts commit 00893681a0ff4 (drm/amd/display: Reject
>> PPLib clock values if they are invalid).
>>
>> 0 kHz values are a thing on at least the boards below.
>>
>> 1.  MSI MS-7A37/B350M MORTAR (MS-7A37), BIOS 1.G1 05/17/2018
>> 2.  MSI B450M Mortar, 2400G on 4.19.8
>> 3.  Gigabyte Technology Co., Ltd. X470 AORUS ULTRA GAMING/X470 AORUS
>>  ULTRA GAMING-CF, BIOS F30 04/16/2019
>>
>> Asserting instead of giving a useful error message to the user, so they
>> can understand what is going on and how to possible fix things, might be
>> good for development, but is a bad user experience, so should not be on
>> production systems. So, remove the check for now.
>>
>> Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=107296
>> Tested: MSI MS-7A37/B350M MORTAR (MS-7A37)
>> Signed-off-by: Paul Menzel 
> 
> The two assertions should probably just be replaced with 
> DC_LOG_DEBUG(...) instead - this will drop the callstack on boot for 
> production systems.
> 
> Dropping the whole validation also means that we're going to be taking 
> the table as-is and overriding the defaults - which isn't something we'd 
> actually want to do.
> 
> I do think it's fine to just reduce this to a debug message since you'd 
> see this on any 2400G/AM4 (as far as I'm aware), and only for the fCLK 
> table (the tables always come from PPLIB/SMU).

Where can I find more information on this for 2400G/AM4? What about
2200(?) and so on?

If it’s expected, that there is 0 kHz values in there, the code should
deal with that, and filter that out. I do not understand, that the whole
table is invalidated and default values are used instead. That sounds
buggy.


Kind regards,

Paul


>> ---
>>   drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 5 -
>>   1 file changed, 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c 
>> b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
>> index 1b4b51657f5e..edaaae5754fe 100644
>> --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
>> +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
>> @@ -1362,11 +1362,6 @@ static bool verify_clock_values(struct 
>> dm_pp_clock_levels_with_voltage *clks)
>>  if (clks->num_levels == 0)
>>  return false;
>>   
>> -for (i = 0; i < clks->num_levels; i++)
>> -/* Ensure that the result is sane */
>> -if (clks->data[i].clocks_in_khz == 0)
>> -return false;
>> -
>>  return true;
>>   }



smime.p7s
Description: S/MIME Cryptographic Signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/display: Remove check for 0 kHz clock values

2019-07-15 Thread Kazlauskas, Nicholas
On 7/15/19 6:34 AM, Paul Menzel wrote:
>  From 09c1952466752033722b02d9c7e5532e1982f6d9 Mon Sep 17 00:00:00 2001
> From: Paul Menzel 
> Date: Sat, 13 Jul 2019 20:33:49 +0200
> 
> This basically reverts commit 00893681a0ff4 (drm/amd/display: Reject
> PPLib clock values if they are invalid).
> 
> 0 kHz values are a thing on at least the boards below.
> 
> 1.  MSI MS-7A37/B350M MORTAR (MS-7A37), BIOS 1.G1 05/17/2018
> 2.  MSI B450M Mortar, 2400G on 4.19.8
> 3.  Gigabyte Technology Co., Ltd. X470 AORUS ULTRA GAMING/X470 AORUS
>  ULTRA GAMING-CF, BIOS F30 04/16/2019
> 
> Asserting instead of giving a useful error message to the user, so they
> can understand what is going on and how to possible fix things, might be
> good for development, but is a bad user experience, so should not be on
> production systems. So, remove the check for now.
> 
> Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=107296
> Tested: MSI MS-7A37/B350M MORTAR (MS-7A37)
> Signed-off-by: Paul Menzel 

The two assertions should probably just be replaced with 
DC_LOG_DEBUG(...) instead - this will drop the callstack on boot for 
production systems.

Dropping the whole validation also means that we're going to be taking 
the table as-is and overriding the defaults - which isn't something we'd 
actually want to do.

I do think it's fine to just reduce this to a debug message since you'd 
see this on any 2400G/AM4 (as far as I'm aware), and only for the fCLK 
table (the tables always come from PPLIB/SMU).

Nicholas Kazlauskas

> ---
>   drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 5 -
>   1 file changed, 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c 
> b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
> index 1b4b51657f5e..edaaae5754fe 100644
> --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
> +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
> @@ -1362,11 +1362,6 @@ static bool verify_clock_values(struct 
> dm_pp_clock_levels_with_voltage *clks)
>   if (clks->num_levels == 0)
>   return false;
>   
> - for (i = 0; i < clks->num_levels; i++)
> - /* Ensure that the result is sane */
> - if (clks->data[i].clocks_in_khz == 0)
> - return false;
> -
>   return true;
>   }
>   
> 
> 
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Fix Vega20 Perf counter for pcie_bw

2019-07-15 Thread Russell, Kent
The perf counter for Vega20 is 108, instead of 104 which it was on all
previous GPUs, so add a check to use the appropriate value.

Change-Id: Id5b5026a03b09d8b9d52dda85e17ed5acd818912
Signed-off-by: Kent Russell 
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 1e424d918334..852ad0a07995 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -716,9 +716,15 @@ static void soc15_get_pcie_usage(struct amdgpu_device 
*adev, uint64_t *count0,
return;
 
/* Set the 2 events that we wish to watch, defined above */
-   /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
+   /* Reg 40 is # received msgs */
perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
-   perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
+   /* Pre-VG20, Reg 104 is # of posted requests sent. On VG20 it's 108 */
+   if (adev->asic_type == CHIP_VEGA20)
+   perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK,
+   EVENT1_SEL, 108);
+   else
+   perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK,
+   EVENT1_SEL, 104);
 
/* Write to enable desired perf counters */
WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: extend AMDGPU_CTX_PRIORITY_NORMAL comment

2019-07-15 Thread Christian König

Am 02.07.19 um 19:15 schrieb Emil Velikov:

On Fri, 14 Jun 2019 at 19:02, Koenig, Christian
 wrote:

Am 14.06.19 um 19:33 schrieb Emil Velikov:

From: Emil Velikov 

Currently the AMDGPU_CTX_PRIORITY_* defines are used in both
drm_amdgpu_ctx_in::priority and drm_amdgpu_sched_in::priority.

Extend the comment to mention the CAP_SYS_NICE or DRM_MASTER requirement
is only applicable with the former.

Cc: Bas Nieuwenhuizen 
Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Emil Velikov 
---
Mildly curious: why didn't one extend ctx_amdgpu_ctx instead of adding
drm_amdgpu_sched? New flag + _u32 fd at the end (for the former) would
have been enough (and tweaking the ioctl permission thingy).

The drm_amdgpu_sched is only allowed for DRM_MASTER.


Fair enough.

Is the patch wrong or did it slip through the cracks? I cannot see it
in Alex's tree.


Looks like Alex just missed this one and I was on vacation/out of office 
for a while.


I've gone ahead added my rb and just pushed it.

Thanks,
Christian.



-Emil
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amd/powerplay: remove redundant memset

2019-07-15 Thread Fuqian Huang
kzalloc has already zeroed the memory.
So the memset is unneeded.

Signed-off-by: Fuqian Huang 
---
 drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c 
b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index 8fafcbdb1dfd..0fb6066997b2 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -1295,7 +1295,6 @@ static int vega20_set_default_od8_setttings(struct 
smu_context *smu)
if (!table_context->od8_settings)
return -ENOMEM;
 
-   memset(table_context->od8_settings, 0, sizeof(struct 
vega20_od8_settings));
od8_settings = (struct vega20_od8_settings 
*)table_context->od8_settings;
 
if (smu_feature_is_enabled(smu, FEATURE_DPM_SOCCLK_BIT)) {
-- 
2.11.0



[PATCH v2 6/6] drm/amdgpu: utilize subconnector property for DP

2019-07-15 Thread Oleg Vasilev
Since DP-specific information is stored in driver's structures, every
driver needs to implement subconnector property by itself.

Signed-off-by: Oleg Vasilev 
Cc: amd-gfx@lists.freedesktop.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 12 
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h   |  1 +
 drivers/gpu/drm/amd/amdgpu/atombios_dp.c   | 18 +-
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 73b2ede773d3..099286467c82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -26,6 +26,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include "amdgpu.h"
@@ -1407,6 +1408,10 @@ amdgpu_connector_dp_detect(struct drm_connector 
*connector, bool force)
pm_runtime_put_autosuspend(connector->dev->dev);
}
 
+   drm_dp_set_subconnector_property(&amdgpu_connector->base,
+ret,
+amdgpu_dig_connector->dpcd,
+
amdgpu_dig_connector->downstream_ports);
return ret;
 }
 
@@ -1567,6 +1572,13 @@ amdgpu_connector_add(struct amdgpu_device *adev,
DRM_ERROR("Failed to assign router i2c bus! Check dmesg 
for i2c errors.\n");
}
 
+   if ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
+  (connector->connector_type == DRM_MODE_CONNECTOR_eDP))
+   {
+   drm_mode_add_dp_subconnector_property(&amdgpu_connector->base);
+   }
+
+
if (is_dp_bridge) {
amdgpu_dig_connector = kzalloc(sizeof(struct 
amdgpu_connector_atom_dig), GFP_KERNEL);
if (!amdgpu_dig_connector)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index eb9975f4decb..cb360b44371c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -469,6 +469,7 @@ struct amdgpu_encoder {
 struct amdgpu_connector_atom_dig {
/* displayport */
u8 dpcd[DP_RECEIVER_CAP_SIZE];
+   u8 downstream_ports[DP_MAX_DOWNSTREAM_PORTS];
u8 dp_sink_type;
int dp_clock;
int dp_lane_count;
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c 
b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 6858cde9fc5d..b0d414553e71 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -334,6 +334,22 @@ static void amdgpu_atombios_dp_probe_oui(struct 
amdgpu_connector *amdgpu_connect
  buf[0], buf[1], buf[2]);
 }
 
+static void amdgpu_atombios_dp_ds_ports(struct amdgpu_connector 
*amdgpu_connector)
+{
+   struct amdgpu_connector_atom_dig *dig_connector = 
amdgpu_connector->con_priv;
+   int ret;
+
+   if (dig_connector->dpcd[DP_DPCD_REV] > 0x10) {
+   ret = drm_dp_dpcd_read(&amdgpu_connector->ddc_bus->aux,
+  DP_DOWNSTREAM_PORT_0,
+  dig_connector->downstream_ports,
+  DP_MAX_DOWNSTREAM_PORTS);
+   if (ret)
+   memset(dig_connector->downstream_ports, 0,
+  DP_MAX_DOWNSTREAM_PORTS);
+   }
+}
+
 int amdgpu_atombios_dp_get_dpcd(struct amdgpu_connector *amdgpu_connector)
 {
struct amdgpu_connector_atom_dig *dig_connector = 
amdgpu_connector->con_priv;
@@ -349,7 +365,7 @@ int amdgpu_atombios_dp_get_dpcd(struct amdgpu_connector 
*amdgpu_connector)
  dig_connector->dpcd);
 
amdgpu_atombios_dp_probe_oui(amdgpu_connector);
-
+   amdgpu_atombios_dp_ds_ports(amdgpu_connector);
return 0;
}
 
-- 
2.22.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/ttm: Fix the memory delay free issue

2019-07-15 Thread Koenig, Christian
Hi Emily,

no, we can only cleanup the current one because we don't have a 
reference to the other ones.

At least that's how I understand you question,
Christian.

Am 15.07.19 um 12:47 schrieb Deng, Emily:
> Hi Christian,
>   Do you think we could free all those bos those are in current destroy 
> list when the current resv is signal in ttm_bo_cleanup_refs?
>
> Best wishes
> Emily Deng
>
>> -Original Message-
>> From: Koenig, Christian 
>> Sent: Monday, July 15, 2019 5:41 PM
>> To: Deng, Emily ; Zhou, David(ChunMing)
>> 
>> Cc: amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue
>>
>>> Do you think we don't need to fix it?
>> No, when the application is exhausting memory then we can't expect anything
>> else here.
>>
>> See memory freeing is always delayed until it isn't used any more or when the
>> process is killed after access is prevented (by clearing page tables for 
>> example).
>>
>> What we could do is maybe look into why we don't block until the memory is
>> freed during command submission, but apart from that this sounds like
>> perfectly expected behavior.
>>
>> Regards,
>> Christian.
>>
>> Am 15.07.19 um 11:34 schrieb Deng, Emily:
>>> Hi Christian,
>>>   As has this behavior, when test vulkan cts allocation test, it will
>> exhausting the memory, and cause out of memory. Do you think we don't
>> need to fix it?
>>> Best wishes
>>> Emily Deng
 -Original Message-
 From: Koenig, Christian 
 Sent: Monday, July 15, 2019 5:31 PM
 To: Deng, Emily ; Zhou, David(ChunMing)
 
 Cc: amd-gfx@lists.freedesktop.org
 Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

 Hi guys,

> Do you have any suggestion about this? For per vm bo, it seems
> always
 delay to free the ttm bo.
 Yeah, and that is correct behavior.

 Since we don't know who is using a per-vm BO we need to wait for all
 command submissions in flight when it is freed.

 For this we copy the current state of the shared reservation object
 into the private one in ttm_bo_individualize_resv.

 Regards,
 Christian.

 Am 15.07.19 um 08:49 schrieb Deng, Emily:
> Hi David,
> You are right, it will copy per-vm resv.
> But currently, it still has the delay free issue which non
> per vm bo doesn't
 has. Maybe it already has new fences append to this resv object before
>> copy.
> Hi Christian,
>Do you have any suggestion about this? For per vm bo, it seems
> always
 delay to free the ttm bo.
> Best wishes
> Emily Deng
>> -Original Message-
>> From: Zhou, David(ChunMing) 
>> Sent: Wednesday, July 10, 2019 9:28 PM
>> To: Deng, Emily ; amd-
>> g...@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue
>>
>> It doesn't make sense that freeing BO still uses per-vm resv.
>>
>> I remember when BO is in release list, its resv will be from per-vm resv
>> copy.
>> Could you check it?
>>
>> -David
>>
>> 在 2019/7/10 17:29, Emily Deng 写道:
>>> For vulkan cts allocation test cases, they will create a series of
>>> bos, and then free them. As it has lots of alloction test cases
>>> with the same vm, as per vm bo feature enable, all of those bos'
>>> resv are the same. But the bo free is quite slow, as they use the
>>> same resv object, for every time, free a bo, it will check the
>>> resv whether signal, if it signal, then will free it. But as the
>>> test cases will continue to create bo, and the resv fence is
>>> increasing. So the free is more
>> slower than creating. It will cause memory exhausting.
>>> Method:
>>> When the resv signal, release all the bos which are use the same
>>> resv object.
>>>
>>> Signed-off-by: Emily Deng 
>>> ---
>>>  drivers/gpu/drm/ttm/ttm_bo.c | 29 
>> -
>>>  1 file changed, 24 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>> b/drivers/gpu/drm/ttm/ttm_bo.c index f9a3d4c..57ec59b 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -543,6 +543,7 @@ static int ttm_bo_cleanup_refs(struct
>> ttm_buffer_object *bo,
>>>  {
>>> struct ttm_bo_global *glob = bo->bdev->glob;
>>> struct reservation_object *resv;
>>> +   struct ttm_buffer_object *resv_bo, *resv_bo_next;
>>> int ret;
>>>
>>> if (unlikely(list_empty(&bo->ddestroy)))
>>> @@ -566,10 +567,14 @@ static int ttm_bo_cleanup_refs(struct
>> ttm_buffer_object *bo,
>>>
>>> interruptible,
>>>30 * HZ);
>>>
>>> -   if (lret < 0)
>>> +   

RE: [PATCH] drm/ttm: Fix the memory delay free issue

2019-07-15 Thread Deng, Emily
Hi Christian,
 Do you think we could free all those bos those are in current destroy list 
when the current resv is signal in ttm_bo_cleanup_refs?

Best wishes
Emily Deng

>-Original Message-
>From: Koenig, Christian 
>Sent: Monday, July 15, 2019 5:41 PM
>To: Deng, Emily ; Zhou, David(ChunMing)
>
>Cc: amd-gfx@lists.freedesktop.org
>Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue
>
>> Do you think we don't need to fix it?
>No, when the application is exhausting memory then we can't expect anything
>else here.
>
>See memory freeing is always delayed until it isn't used any more or when the
>process is killed after access is prevented (by clearing page tables for 
>example).
>
>What we could do is maybe look into why we don't block until the memory is
>freed during command submission, but apart from that this sounds like
>perfectly expected behavior.
>
>Regards,
>Christian.
>
>Am 15.07.19 um 11:34 schrieb Deng, Emily:
>> Hi Christian,
>>  As has this behavior, when test vulkan cts allocation test, it will
>exhausting the memory, and cause out of memory. Do you think we don't
>need to fix it?
>>
>> Best wishes
>> Emily Deng
>>> -Original Message-
>>> From: Koenig, Christian 
>>> Sent: Monday, July 15, 2019 5:31 PM
>>> To: Deng, Emily ; Zhou, David(ChunMing)
>>> 
>>> Cc: amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue
>>>
>>> Hi guys,
>>>
 Do you have any suggestion about this? For per vm bo, it seems
 always
>>> delay to free the ttm bo.
>>> Yeah, and that is correct behavior.
>>>
>>> Since we don't know who is using a per-vm BO we need to wait for all
>>> command submissions in flight when it is freed.
>>>
>>> For this we copy the current state of the shared reservation object
>>> into the private one in ttm_bo_individualize_resv.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 15.07.19 um 08:49 schrieb Deng, Emily:
 Hi David,
You are right, it will copy per-vm resv.
But currently, it still has the delay free issue which non
 per vm bo doesn't
>>> has. Maybe it already has new fences append to this resv object before
>copy.
 Hi Christian,
   Do you have any suggestion about this? For per vm bo, it seems
 always
>>> delay to free the ttm bo.
 Best wishes
 Emily Deng
> -Original Message-
> From: Zhou, David(ChunMing) 
> Sent: Wednesday, July 10, 2019 9:28 PM
> To: Deng, Emily ; amd-
>g...@lists.freedesktop.org
> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue
>
> It doesn't make sense that freeing BO still uses per-vm resv.
>
> I remember when BO is in release list, its resv will be from per-vm resv
>copy.
> Could you check it?
>
> -David
>
> 在 2019/7/10 17:29, Emily Deng 写道:
>> For vulkan cts allocation test cases, they will create a series of
>> bos, and then free them. As it has lots of alloction test cases
>> with the same vm, as per vm bo feature enable, all of those bos'
>> resv are the same. But the bo free is quite slow, as they use the
>> same resv object, for every time, free a bo, it will check the
>> resv whether signal, if it signal, then will free it. But as the
>> test cases will continue to create bo, and the resv fence is
>> increasing. So the free is more
> slower than creating. It will cause memory exhausting.
>> Method:
>> When the resv signal, release all the bos which are use the same
>> resv object.
>>
>> Signed-off-by: Emily Deng 
>> ---
>> drivers/gpu/drm/ttm/ttm_bo.c | 29 
>-
>> 1 file changed, 24 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>> b/drivers/gpu/drm/ttm/ttm_bo.c index f9a3d4c..57ec59b 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -543,6 +543,7 @@ static int ttm_bo_cleanup_refs(struct
> ttm_buffer_object *bo,
>> {
>>  struct ttm_bo_global *glob = bo->bdev->glob;
>>  struct reservation_object *resv;
>> +struct ttm_buffer_object *resv_bo, *resv_bo_next;
>>  int ret;
>>
>>  if (unlikely(list_empty(&bo->ddestroy)))
>> @@ -566,10 +567,14 @@ static int ttm_bo_cleanup_refs(struct
> ttm_buffer_object *bo,
>> 
>> interruptible,
>> 30 * HZ);
>>
>> -if (lret < 0)
>> +if (lret < 0) {
>> +kref_put(&bo->list_kref, ttm_bo_release_list);
>>  return lret;
>> -else if (lret == 0)
>> +}
>> +else if (lret == 0) {
>> +kref_put(&bo->list_kref, ttm_bo_release_list);
>> 

Re: [PATCH v3 05/24] drm/amdgpu: remove memset after kzalloc

2019-07-15 Thread Emil Velikov
On 2019/07/15, Fuqian Huang wrote:
> kzalloc has already zeroed the memory during the allocation.
> So memset is unneeded.
> 
> Signed-off-by: Fuqian Huang 
> ---
> Changes in v3:
>   - Fix subject prefix: gpu/drm -> drm/amdgpu
> 
Reviewed-by: Emil Velikov 

-Emil



[PATCH] drm/amd/display: Remove check for 0 kHz clock values

2019-07-15 Thread Paul Menzel
From 09c1952466752033722b02d9c7e5532e1982f6d9 Mon Sep 17 00:00:00 2001
From: Paul Menzel 
Date: Sat, 13 Jul 2019 20:33:49 +0200

This basically reverts commit 00893681a0ff4 (drm/amd/display: Reject 
PPLib clock values if they are invalid).

0 kHz values are a thing on at least the boards below.

1.  MSI MS-7A37/B350M MORTAR (MS-7A37), BIOS 1.G1 05/17/2018
2.  MSI B450M Mortar, 2400G on 4.19.8
3.  Gigabyte Technology Co., Ltd. X470 AORUS ULTRA GAMING/X470 AORUS
ULTRA GAMING-CF, BIOS F30 04/16/2019

Asserting instead of giving a useful error message to the user, so they
can understand what is going on and how to possible fix things, might be
good for development, but is a bad user experience, so should not be on
production systems. So, remove the check for now.

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=107296
Tested: MSI MS-7A37/B350M MORTAR (MS-7A37)
Signed-off-by: Paul Menzel 
---
 drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c 
b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
index 1b4b51657f5e..edaaae5754fe 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -1362,11 +1362,6 @@ static bool verify_clock_values(struct 
dm_pp_clock_levels_with_voltage *clks)
if (clks->num_levels == 0)
return false;
 
-   for (i = 0; i < clks->num_levels; i++)
-   /* Ensure that the result is sane */
-   if (clks->data[i].clocks_in_khz == 0)
-   return false;
-
return true;
 }
 
-- 
2.22.0



smime.p7s
Description: S/MIME Cryptographic Signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/ttm: Fix the memory delay free issue

2019-07-15 Thread Koenig, Christian
> Do you think we don't need to fix it?
No, when the application is exhausting memory then we can't expect 
anything else here.

See memory freeing is always delayed until it isn't used any more or 
when the process is killed after access is prevented (by clearing page 
tables for example).

What we could do is maybe look into why we don't block until the memory 
is freed during command submission, but apart from that this sounds like 
perfectly expected behavior.

Regards,
Christian.

Am 15.07.19 um 11:34 schrieb Deng, Emily:
> Hi Christian,
>  As has this behavior, when test vulkan cts allocation test, it will 
> exhausting the memory, and cause out of memory. Do you think we don't need to 
> fix it?
>
> Best wishes
> Emily Deng
>> -Original Message-
>> From: Koenig, Christian 
>> Sent: Monday, July 15, 2019 5:31 PM
>> To: Deng, Emily ; Zhou, David(ChunMing)
>> 
>> Cc: amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue
>>
>> Hi guys,
>>
>>> Do you have any suggestion about this? For per vm bo, it seems always
>> delay to free the ttm bo.
>> Yeah, and that is correct behavior.
>>
>> Since we don't know who is using a per-vm BO we need to wait for all
>> command submissions in flight when it is freed.
>>
>> For this we copy the current state of the shared reservation object into the
>> private one in ttm_bo_individualize_resv.
>>
>> Regards,
>> Christian.
>>
>> Am 15.07.19 um 08:49 schrieb Deng, Emily:
>>> Hi David,
>>>You are right, it will copy per-vm resv.
>>>But currently, it still has the delay free issue which non per vm bo 
>>> doesn't
>> has. Maybe it already has new fences append to this resv object before copy.
>>> Hi Christian,
>>>   Do you have any suggestion about this? For per vm bo, it seems always
>> delay to free the ttm bo.
>>> Best wishes
>>> Emily Deng
 -Original Message-
 From: Zhou, David(ChunMing) 
 Sent: Wednesday, July 10, 2019 9:28 PM
 To: Deng, Emily ; amd-gfx@lists.freedesktop.org
 Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue

 It doesn't make sense that freeing BO still uses per-vm resv.

 I remember when BO is in release list, its resv will be from per-vm resv 
 copy.
 Could you check it?

 -David

 在 2019/7/10 17:29, Emily Deng 写道:
> For vulkan cts allocation test cases, they will create a series of
> bos, and then free them. As it has lots of alloction test cases with
> the same vm, as per vm bo feature enable, all of those bos' resv are
> the same. But the bo free is quite slow, as they use the same resv
> object, for every time, free a bo, it will check the resv whether
> signal, if it signal, then will free it. But as the test cases will
> continue to create bo, and the resv fence is increasing. So the free
> is more
 slower than creating. It will cause memory exhausting.
> Method:
> When the resv signal, release all the bos which are use the same
> resv object.
>
> Signed-off-by: Emily Deng 
> ---
> drivers/gpu/drm/ttm/ttm_bo.c | 29 -
> 1 file changed, 24 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> b/drivers/gpu/drm/ttm/ttm_bo.c index f9a3d4c..57ec59b 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -543,6 +543,7 @@ static int ttm_bo_cleanup_refs(struct
 ttm_buffer_object *bo,
> {
>   struct ttm_bo_global *glob = bo->bdev->glob;
>   struct reservation_object *resv;
> + struct ttm_buffer_object *resv_bo, *resv_bo_next;
>   int ret;
>
>   if (unlikely(list_empty(&bo->ddestroy)))
> @@ -566,10 +567,14 @@ static int ttm_bo_cleanup_refs(struct
 ttm_buffer_object *bo,
>  
> interruptible,
>  30 * HZ);
>
> - if (lret < 0)
> + if (lret < 0) {
> + kref_put(&bo->list_kref, ttm_bo_release_list);
>   return lret;
> - else if (lret == 0)
> + }
> + else if (lret == 0) {
> + kref_put(&bo->list_kref, ttm_bo_release_list);
>   return -EBUSY;
> + }
>
>   spin_lock(&glob->lru_lock);
>   if (unlock_resv && 
> !kcl_reservation_object_trylock(bo->resv))
 { @@
> -582,6 +587,7 @@ static int ttm_bo_cleanup_refs(struct
> ttm_buffer_object
 *bo,
>* here.
>*/
>   spin_unlock(&glob->lru_lock);
> + kref_put(&bo->list_kref, ttm_bo_release_list);
>   return 0;
>

RE: [PATCH] drm/ttm: Fix the memory delay free issue

2019-07-15 Thread Deng, Emily
Hi Christian,
As has this behavior, when test vulkan cts allocation test, it will 
exhausting the memory, and cause out of memory. Do you think we don't need to 
fix it?

Best wishes
Emily Deng
>-Original Message-
>From: Koenig, Christian 
>Sent: Monday, July 15, 2019 5:31 PM
>To: Deng, Emily ; Zhou, David(ChunMing)
>
>Cc: amd-gfx@lists.freedesktop.org
>Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue
>
>Hi guys,
>
>> Do you have any suggestion about this? For per vm bo, it seems always
>delay to free the ttm bo.
>Yeah, and that is correct behavior.
>
>Since we don't know who is using a per-vm BO we need to wait for all
>command submissions in flight when it is freed.
>
>For this we copy the current state of the shared reservation object into the
>private one in ttm_bo_individualize_resv.
>
>Regards,
>Christian.
>
>Am 15.07.19 um 08:49 schrieb Deng, Emily:
>> Hi David,
>>   You are right, it will copy per-vm resv.
>>   But currently, it still has the delay free issue which non per vm bo 
>> doesn't
>has. Maybe it already has new fences append to this resv object before copy.
>>
>> Hi Christian,
>>  Do you have any suggestion about this? For per vm bo, it seems always
>delay to free the ttm bo.
>>
>> Best wishes
>> Emily Deng
>>> -Original Message-
>>> From: Zhou, David(ChunMing) 
>>> Sent: Wednesday, July 10, 2019 9:28 PM
>>> To: Deng, Emily ; amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue
>>>
>>> It doesn't make sense that freeing BO still uses per-vm resv.
>>>
>>> I remember when BO is in release list, its resv will be from per-vm resv 
>>> copy.
>>> Could you check it?
>>>
>>> -David
>>>
>>> 在 2019/7/10 17:29, Emily Deng 写道:
 For vulkan cts allocation test cases, they will create a series of
 bos, and then free them. As it has lots of alloction test cases with
 the same vm, as per vm bo feature enable, all of those bos' resv are
 the same. But the bo free is quite slow, as they use the same resv
 object, for every time, free a bo, it will check the resv whether
 signal, if it signal, then will free it. But as the test cases will
 continue to create bo, and the resv fence is increasing. So the free
 is more
>>> slower than creating. It will cause memory exhausting.
 Method:
 When the resv signal, release all the bos which are use the same
 resv object.

 Signed-off-by: Emily Deng 
 ---
drivers/gpu/drm/ttm/ttm_bo.c | 29 -
1 file changed, 24 insertions(+), 5 deletions(-)

 diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
 b/drivers/gpu/drm/ttm/ttm_bo.c index f9a3d4c..57ec59b 100644
 --- a/drivers/gpu/drm/ttm/ttm_bo.c
 +++ b/drivers/gpu/drm/ttm/ttm_bo.c
 @@ -543,6 +543,7 @@ static int ttm_bo_cleanup_refs(struct
>>> ttm_buffer_object *bo,
{
struct ttm_bo_global *glob = bo->bdev->glob;
struct reservation_object *resv;
 +  struct ttm_buffer_object *resv_bo, *resv_bo_next;
int ret;

if (unlikely(list_empty(&bo->ddestroy)))
 @@ -566,10 +567,14 @@ static int ttm_bo_cleanup_refs(struct
>>> ttm_buffer_object *bo,
   
 interruptible,
   30 * HZ);

 -  if (lret < 0)
 +  if (lret < 0) {
 +  kref_put(&bo->list_kref, ttm_bo_release_list);
return lret;
 -  else if (lret == 0)
 +  }
 +  else if (lret == 0) {
 +  kref_put(&bo->list_kref, ttm_bo_release_list);
return -EBUSY;
 +  }

spin_lock(&glob->lru_lock);
if (unlock_resv && 
 !kcl_reservation_object_trylock(bo->resv))
>>> { @@
 -582,6 +587,7 @@ static int ttm_bo_cleanup_refs(struct
 ttm_buffer_object
>>> *bo,
 * here.
 */
spin_unlock(&glob->lru_lock);
 +  kref_put(&bo->list_kref, ttm_bo_release_list);
return 0;
}
ret = 0;
 @@ -591,15 +597,29 @@ static int ttm_bo_cleanup_refs(struct
>>> ttm_buffer_object *bo,
if (unlock_resv)
kcl_reservation_object_unlock(bo->resv);
spin_unlock(&glob->lru_lock);
 +  kref_put(&bo->list_kref, ttm_bo_release_list);
return ret;
}

ttm_bo_del_from_lru(bo);
list_del_init(&bo->ddestroy);
kref_put(&bo->list_kref, ttm_bo_ref_bug);
 -
spin_unlock(&glob->lru_lock);
ttm_bo_cleanu

Re: [PATCH] drm/ttm: Fix the memory delay free issue

2019-07-15 Thread Koenig, Christian
Hi guys,

> Do you have any suggestion about this? For per vm bo, it seems always delay 
> to free the ttm bo.
Yeah, and that is correct behavior.

Since we don't know who is using a per-vm BO we need to wait for all 
command submissions in flight when it is freed.

For this we copy the current state of the shared reservation object into 
the private one in ttm_bo_individualize_resv.

Regards,
Christian.

Am 15.07.19 um 08:49 schrieb Deng, Emily:
> Hi David,
>   You are right, it will copy per-vm resv.
>   But currently, it still has the delay free issue which non per vm bo 
> doesn't has. Maybe it already has new fences append to this resv object 
> before copy.
>
> Hi Christian,
>  Do you have any suggestion about this? For per vm bo, it seems always 
> delay to free the ttm bo.
>
> Best wishes
> Emily Deng
>> -Original Message-
>> From: Zhou, David(ChunMing) 
>> Sent: Wednesday, July 10, 2019 9:28 PM
>> To: Deng, Emily ; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/ttm: Fix the memory delay free issue
>>
>> It doesn't make sense that freeing BO still uses per-vm resv.
>>
>> I remember when BO is in release list, its resv will be from per-vm resv 
>> copy.
>> Could you check it?
>>
>> -David
>>
>> 在 2019/7/10 17:29, Emily Deng 写道:
>>> For vulkan cts allocation test cases, they will create a series of
>>> bos, and then free them. As it has lots of alloction test cases with
>>> the same vm, as per vm bo feature enable, all of those bos' resv are
>>> the same. But the bo free is quite slow, as they use the same resv
>>> object, for every time, free a bo, it will check the resv whether
>>> signal, if it signal, then will free it. But as the test cases will
>>> continue to create bo, and the resv fence is increasing. So the free is more
>> slower than creating. It will cause memory exhausting.
>>> Method:
>>> When the resv signal, release all the bos which are use the same resv
>>> object.
>>>
>>> Signed-off-by: Emily Deng 
>>> ---
>>>drivers/gpu/drm/ttm/ttm_bo.c | 29 -
>>>1 file changed, 24 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>> b/drivers/gpu/drm/ttm/ttm_bo.c index f9a3d4c..57ec59b 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -543,6 +543,7 @@ static int ttm_bo_cleanup_refs(struct
>> ttm_buffer_object *bo,
>>>{
>>> struct ttm_bo_global *glob = bo->bdev->glob;
>>> struct reservation_object *resv;
>>> +   struct ttm_buffer_object *resv_bo, *resv_bo_next;
>>> int ret;
>>>
>>> if (unlikely(list_empty(&bo->ddestroy)))
>>> @@ -566,10 +567,14 @@ static int ttm_bo_cleanup_refs(struct
>> ttm_buffer_object *bo,
>>>interruptible,
>>>30 * HZ);
>>>
>>> -   if (lret < 0)
>>> +   if (lret < 0) {
>>> +   kref_put(&bo->list_kref, ttm_bo_release_list);
>>> return lret;
>>> -   else if (lret == 0)
>>> +   }
>>> +   else if (lret == 0) {
>>> +   kref_put(&bo->list_kref, ttm_bo_release_list);
>>> return -EBUSY;
>>> +   }
>>>
>>> spin_lock(&glob->lru_lock);
>>> if (unlock_resv && !kcl_reservation_object_trylock(bo->resv))
>> { @@
>>> -582,6 +587,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object
>> *bo,
>>>  * here.
>>>  */
>>> spin_unlock(&glob->lru_lock);
>>> +   kref_put(&bo->list_kref, ttm_bo_release_list);
>>> return 0;
>>> }
>>> ret = 0;
>>> @@ -591,15 +597,29 @@ static int ttm_bo_cleanup_refs(struct
>> ttm_buffer_object *bo,
>>> if (unlock_resv)
>>> kcl_reservation_object_unlock(bo->resv);
>>> spin_unlock(&glob->lru_lock);
>>> +   kref_put(&bo->list_kref, ttm_bo_release_list);
>>> return ret;
>>> }
>>>
>>> ttm_bo_del_from_lru(bo);
>>> list_del_init(&bo->ddestroy);
>>> kref_put(&bo->list_kref, ttm_bo_ref_bug);
>>> -
>>> spin_unlock(&glob->lru_lock);
>>> ttm_bo_cleanup_memtype_use(bo);
>>> +   kref_put(&bo->list_kref, ttm_bo_release_list);
>>> +
>>> +   spin_lock(&glob->lru_lock);
>>> +   list_for_each_entry_safe(resv_bo, resv_bo_next, &bo->bdev-
>>> ddestroy, ddestroy) {
>>> +   if (resv_bo->resv == bo->resv) {
>>> +   ttm_bo_del_from_lru(resv_bo);
>>> +   list_del_init(&resv_bo->ddestroy);
>>> +   spin_unlock(&glob->lru_lock);
>>> +   ttm_bo_cleanup_memtype_use(resv_bo);
>>> +   kref_put(&resv_bo->list_kref, ttm_bo_release_list);
>>> +   spin_lock(&glob->lru_lock);
>>> +   }
>>> +   }
>>> +   spin_unlock(&glob->lru_lock);
>>>
>>> if (unlock_resv)
>>> kcl_r

Re: [PATCH 6/7] drm/amd/powerplay: Use proper enums in vega20_print_clk_levels

2019-07-15 Thread Arnd Bergmann
On Thu, Jul 4, 2019 at 7:52 AM Nathan Chancellor
 wrote:
>
> clang warns:
>
> drivers/gpu/drm/amd/amdgpu/../powerplay/vega20_ppt.c:995:39: warning:
> implicit conversion from enumeration type 'PPCLK_e' to different
> enumeration type 'enum smu_clk_type' [-Wenum-conversion]
> ret = smu_get_current_clk_freq(smu, PPCLK_SOCCLK, &now);
>   ~~^~~
> drivers/gpu/drm/amd/amdgpu/../powerplay/vega20_ppt.c:1016:39: warning:
> implicit conversion from enumeration type 'PPCLK_e' to different
> enumeration type 'enum smu_clk_type' [-Wenum-conversion]
> ret = smu_get_current_clk_freq(smu, PPCLK_FCLK, &now);
>   ~~^
> drivers/gpu/drm/amd/amdgpu/../powerplay/vega20_ppt.c:1031:39: warning:
> implicit conversion from enumeration type 'PPCLK_e' to different
> enumeration type 'enum smu_clk_type' [-Wenum-conversion]
> ret = smu_get_current_clk_freq(smu, PPCLK_DCEFCLK, &now);
>   ~~^~~~
>
> The values are mapped one to one in vega20_get_smu_clk_index so just use
> the proper enums here.
>
> Fixes: 096761014227 ("drm/amd/powerplay: support sysfs to get socclk, fclk, 
> dcefclk")
> Link: https://github.com/ClangBuiltLinux/linux/issues/587
> Signed-off-by: Nathan Chancellor 
> ---

Adding Kevin Wang for further review, as he sent a related patch in
d36893362d22 ("drm/amd/powerplay: fix smu clock type change miss error")

I assume this one is still required as it triggers the same warning.
Kevin, can you have a look?

  Arnd

>  drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c 
> b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> index 0f14fe14ecd8..e62dd6919b24 100644
> --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
> @@ -992,7 +992,7 @@ static int vega20_print_clk_levels(struct smu_context 
> *smu,
> break;
>
> case SMU_SOCCLK:
> -   ret = smu_get_current_clk_freq(smu, PPCLK_SOCCLK, &now);
> +   ret = smu_get_current_clk_freq(smu, SMU_SOCCLK, &now);
> if (ret) {
> pr_err("Attempt to get current socclk Failed!");
> return ret;
> @@ -1013,7 +1013,7 @@ static int vega20_print_clk_levels(struct smu_context 
> *smu,
> break;
>
> case SMU_FCLK:
> -   ret = smu_get_current_clk_freq(smu, PPCLK_FCLK, &now);
> +   ret = smu_get_current_clk_freq(smu, SMU_FCLK, &now);
> if (ret) {
> pr_err("Attempt to get current fclk Failed!");
> return ret;
> @@ -1028,7 +1028,7 @@ static int vega20_print_clk_levels(struct smu_context 
> *smu,
> break;
>
> case SMU_DCEFCLK:
> -   ret = smu_get_current_clk_freq(smu, PPCLK_DCEFCLK, &now);
> +   ret = smu_get_current_clk_freq(smu, SMU_DCEFCLK, &now);
> if (ret) {
> pr_err("Attempt to get current dcefclk Failed!");
> return ret;


Re: [PATCH] drm/amd/powerplay: work around enum conversion warnings

2019-07-15 Thread Arnd Bergmann
On Mon, Jul 8, 2019 at 6:05 PM Arnd Bergmann  wrote:
> On Mon, Jul 8, 2019 at 4:54 PM Nathan Chancellor
>  wrote:

> > On Mon, Jul 08, 2019 at 03:57:06PM +0200, Arnd Bergmann wrote:
> > > A couple of calls to smu_get_current_clk_freq() and smu_force_clk_levels()
> > > pass constants of the wrong type, leading to warnings with clang-8:
> > >
> > > drivers/gpu/drm/amd/amdgpu/../powerplay/vega20_ppt.c:995:39: error: 
> > > implicit conversion from enumeration type 'PPCLK_e' to different 
> > > enumeration type 'enum smu_clk_type' [-Werror,-Wenum-conversion]
> > > ret = smu_get_current_clk_freq(smu, PPCLK_SOCCLK, &now);
> > >   ~~^~~
> > > drivers/gpu/drm/amd/amdgpu/../powerplay/inc/amdgpu_smu.h:775:82: note: 
> > > expanded from macro 'smu_get_current_clk_freq'
> > > ((smu)->funcs->get_current_clk_freq? 
> > > (smu)->funcs->get_current_clk_freq((smu), (clk_id), (value)) : 0)
> > >
> > > I could not figure out what the purpose is of mixing the types
> > > like this and if it is written like this intentionally.
> > > Assuming this is all correct, adding an explict case is an
> > > easy way to shut up the warnings.
> > >
> > > Fixes: bc0fcffd36ba ("drm/amd/powerplay: Unify smu handle task function 
> > > (v2)")
> > > Fixes: 096761014227 ("drm/amd/powerplay: support sysfs to get socclk, 
> > > fclk, dcefclk")
> > > Signed-off-by: Arnd Bergmann 
> >
> > I sent a series last week for all of the clang warnings that were added
> > in this driver recently.
> >
> > https://lore.kernel.org/lkml/20190704055217.45860-1-natechancel...@gmail.com/
> >
> > I think it is safe to use the CLK enums from the expected type (from
> > what I could see from going down the code flow rabbit hole).
> >
> > https://lore.kernel.org/lkml/20190704055217.45860-4-natechancel...@gmail.com/
> >
> > https://lore.kernel.org/lkml/20190704055217.45860-7-natechancel...@gmail.com/
>
> I tried that at first but concluded that it could not work because the 
> constants
> are different. Either it's currently broken and you patches fix the runtime
> behavior, or it's currently correct and your patches break it.

d36893362d22 ("drm/amd/powerplay: fix smu clock type change miss error")
was now applied and contains the same change as your first patch.

I assume the other one is still needed though.

   Arnd


<    1   2   3