[PATCH] drm/amdgpu: enable gfxoff for raven1 refresh

2019-12-12 Thread Changfeng.Zhu
From: changzhu 

When smu version is larger than 0x41e2b, it will load
raven_kicker_rlc.bin.To enable gfxoff for raven_kicker_rlc.bin,it
needs to avoid adev->pm.pp_feature &= ~PP_GFXOFF_MASK when it loads
raven_kicker_rlc.bin.

Change-Id: I4dffa1783c9ceb5d40df9756d821e2cd7feff84d
Signed-off-by: changzhu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 15 ---
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ea58d0e5be4c..68409bb7c9e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1038,17 +1038,10 @@ static void gfx_v9_0_check_if_need_gfxoff(struct 
amdgpu_device *adev)
case CHIP_VEGA20:
break;
case CHIP_RAVEN:
-   /* Disable GFXOFF on original raven.  There are combinations
-* of sbios and platforms that are not stable.
-*/
-   if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
-   adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
-   else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
-&&((adev->gfx.rlc_fw_version != 106 &&
-adev->gfx.rlc_fw_version < 531) ||
-   (adev->gfx.rlc_fw_version == 53815) ||
-   (adev->gfx.rlc_feature_version < 1) ||
-   !adev->gfx.rlc.is_rlc_v2_1))
+   if (!(adev->rev_id >= 0x8 ||
+ adev->pdev->device == 0x15d8) &&
+   (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */
+!adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore 
ucodes */
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 
if (adev->pm.pp_feature & PP_GFXOFF_MASK)
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdkfd: Add Arcturus specific set_vm_context_page_table_base()

2019-12-12 Thread Yong Zhao

The first one was already fixed and pushed a week ago.

Regards,

Yong

On 2019-12-12 7:25 p.m., Felix Kuehling wrote:
I agree with Christian's comments on patch 1. With those fixed, the 
series is


Reviewed-by: Felix Kuehling 

Regards,
  Felix

On 2019-12-02 20:42, Yong Zhao wrote:

Since Arcturus has it own function pointer, we can move Arcturus
specific logic to there rather than leaving it entangled with
other GFX9 chips.

Change-Id: I7df7c004a0c8ac0616ded0e65144670df50f92a7
Signed-off-by: Yong Zhao 
---
  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   | 20 ++-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 14 +++--
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  2 --
  3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c

index b6713e0ed1b2..3c119407dc34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -46,6 +46,8 @@
  #include "soc15.h"
  #include "soc15d.h"
  #include "amdgpu_amdkfd_gfx_v9.h"
+#include "gfxhub_v1_0.h"
+#include "mmhub_v9_4.h"
    #define HQD_N_REGS 56
  #define DUMP_REG(addr) do {    \
@@ -258,6 +260,22 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev 
*kgd, void *mqd,

  return 0;
  }
  +static void kgd_set_vm_context_page_table_base(struct kgd_dev 
*kgd, uint32_t vmid,

+    uint64_t page_table_base)
+{
+    struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+    if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+    pr_err("trying to set page table base for wrong VMID %u\n",
+   vmid);
+    return;
+    }
+
+    mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
+
+    gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
  const struct kfd2kgd_calls arcturus_kfd2kgd = {
  .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
  .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -277,7 +295,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
  .get_atc_vmid_pasid_mapping_info =
  kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
  .get_tile_config = kgd_gfx_v9_get_tile_config,
-    .set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
+    .set_vm_context_page_table_base = 
kgd_set_vm_context_page_table_base,

  .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
  .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
  .get_hive_id = amdgpu_amdkfd_get_hive_id,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c

index 6f1a4676ddde..e7861f0ef415 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -40,7 +40,6 @@
  #include "soc15d.h"
  #include "mmhub_v1_0.h"
  #include "gfxhub_v1_0.h"
-#include "mmhub_v9_4.h"
      enum hqd_dequeue_request_type {
@@ -758,8 +757,8 @@ uint32_t 
kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,

  return 0;
  }
  -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev 
*kgd, uint32_t vmid,

-    uint64_t page_table_base)
+static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev 
*kgd,

+    uint32_t vmid, uint64_t page_table_base)
  {
  struct amdgpu_device *adev = get_amdgpu_device(kgd);
  @@ -769,14 +768,7 @@ void 
kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, 
uint32_t vmi

  return;
  }
  -    /* TODO: take advantage of per-process address space size. For
- * now, all processes share the same address space size, like
- * on GFX8 and older.
- */
-    if (adev->asic_type == CHIP_ARCTURUS) {
-    mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
-    } else
-    mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+    mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
    gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h

index d9e9ad22b2bd..02b1426d17d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -57,8 +57,6 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct 
kgd_dev *kgd,

    bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
  uint8_t vmid, uint16_t *p_pasid);
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, 
uint32_t vmid,

-    uint64_t page_table_base);
  int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
  int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t 
vmid);

  int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

Re: [PATCH 2/2] drm/amdkfd: Add Arcturus specific set_vm_context_page_table_base()

2019-12-12 Thread Felix Kuehling
I agree with Christian's comments on patch 1. With those fixed, the 
series is


Reviewed-by: Felix Kuehling 

Regards,
  Felix

On 2019-12-02 20:42, Yong Zhao wrote:

Since Arcturus has it own function pointer, we can move Arcturus
specific logic to there rather than leaving it entangled with
other GFX9 chips.

Change-Id: I7df7c004a0c8ac0616ded0e65144670df50f92a7
Signed-off-by: Yong Zhao 
---
  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   | 20 ++-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 14 +++--
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  2 --
  3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index b6713e0ed1b2..3c119407dc34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -46,6 +46,8 @@
  #include "soc15.h"
  #include "soc15d.h"
  #include "amdgpu_amdkfd_gfx_v9.h"
+#include "gfxhub_v1_0.h"
+#include "mmhub_v9_4.h"
  
  #define HQD_N_REGS 56

  #define DUMP_REG(addr) do {   \
@@ -258,6 +260,22 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
return 0;
  }
  
+static void kgd_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,

+   uint64_t page_table_base)
+{
+   struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+   if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+   pr_err("trying to set page table base for wrong VMID %u\n",
+  vmid);
+   return;
+   }
+
+   mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
+
+   gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
  const struct kfd2kgd_calls arcturus_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -277,7 +295,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.get_tile_config = kgd_gfx_v9_get_tile_config,
-   .set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
+   .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 6f1a4676ddde..e7861f0ef415 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -40,7 +40,6 @@
  #include "soc15d.h"
  #include "mmhub_v1_0.h"
  #include "gfxhub_v1_0.h"
-#include "mmhub_v9_4.h"
  
  
  enum hqd_dequeue_request_type {

@@ -758,8 +757,8 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev 
*kgd,
return 0;
  }
  
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,

-   uint64_t page_table_base)
+static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
+   uint32_t vmid, uint64_t page_table_base)
  {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
  
@@ -769,14 +768,7 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmi

return;
}
  
-	/* TODO: take advantage of per-process address space size. For

-* now, all processes share the same address space size, like
-* on GFX8 and older.
-*/
-   if (adev->asic_type == CHIP_ARCTURUS) {
-   mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
-   } else
-   mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+   mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
  
  	gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);

  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index d9e9ad22b2bd..02b1426d17d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -57,8 +57,6 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev 
*kgd,
  
  bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,

uint8_t vmid, uint16_t *p_pasid);
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t 
vmid,
-   uint64_t page_table_base);
  int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
  int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
  int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,

___
amd-gfx mailing list

rocm 2.10: clinfo generates segfault in /opt/rocm/hsa/lib/libhsa-ext-image64.so.1:amd::GpuAgent::GetInfo()

2019-12-12 Thread John Utz
Geetings amd-gfx!

I beg your forgiveness in advance if you feel spammed a bit by me emailing 
amd-gfx after submitting 2 issues.

My issues have been entered as ROC 961 and 962.

By no means do i expect that any of you would have read those yet (or at all  )

 I wanted to reach out directly to you folks because based on the checkin mails 
i have been reading it seems pretty obvious that you all deal with this 
everyday.

I am working on standing up ROCm on an UDOO Bolt V8. This is a Raven Ridge 
board running ubuntu 18.04.3.
It's a Raven Ridge board so its a supported iGPU.

I followed the instructions and add the repo to my sources and install rocm via 
apt get.

I get a segfault in the early stages of running clinfo  ISSUE #962

I dont get line numbers in the stack trace because the binaries are stripped 
ISSUE #961

So how does one change something installed via apt get to build debug instead 
of release?

I am pretty sure something is trying to read a NULL inside of  
amd::GpuAgent::GetInfo() but i dont know where.

i didnt find a existing issue with this repro case, perhaps I didnt look in the 
right place.

Kindly let me know if one already exists.

Also, please let me know if there is already a fix in the tree that has yet to 
be checked into a viewable branch.

Thankyou in advance for any help you can provide!

John Utz
Pensar Development


John L. Utz III   |   Pensar 
Development   | 206.747.5497

This email message may contain confidential and proprietary information.  Any 
unauthorized use is prohibited.
EXPORT OR RE-EXPORT OF INFORMATION CONTAINED HEREIN MAY BE SUBJECT TO 
RESTRICTIONS AND REQUIREMENTS OF U.S. EXPORT LAWS AND REGULATIONS, AND MAY 
REQUIRE ADVANCE AUTHORIZATION FROM THE U.S. GOVERNMENT. If you are not the 
intended recipient, please contact the sender by reply email and destroy all 
copies of the original message.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/3] amdgpu: Prepare DCN floating point macros for generic

2019-12-12 Thread Alex Deucher
On Sat, Dec 7, 2019 at 5:47 PM Timothy Pearson
 wrote:
>
>  arch support
>
> Introduce DC_FP_START()/DC_FP_END() macros to help enable floating
> point kernel mode support across various architectures.
>
> Signed-off-by: Timothy Pearson 

Applied with a bit of tweaking.  Thanks!

Alex

> ---
>  .../gpu/drm/amd/display/dc/calcs/dcn_calcs.c  | 24 +--
>  .../drm/amd/display/dc/dcn20/dcn20_resource.c |  4 ++--
>  .../drm/amd/display/dc/dcn21/dcn21_resource.c |  4 ++--
>  drivers/gpu/drm/amd/display/dc/os_types.h |  3 +++
>  4 files changed, 19 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c 
> b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
> index 9b2cb57bf2ba..cd5471263248 100644
> --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
> +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
> @@ -626,7 +626,7 @@ static bool dcn_bw_apply_registry_override(struct dc *dc)
>  {
> bool updated = false;
>
> -   kernel_fpu_begin();
> +   DC_FP_START();
> if ((int)(dc->dcn_soc->sr_exit_time * 1000) != 
> dc->debug.sr_exit_time_ns
> && dc->debug.sr_exit_time_ns) {
> updated = true;
> @@ -662,7 +662,7 @@ static bool dcn_bw_apply_registry_override(struct dc *dc)
> dc->dcn_soc->dram_clock_change_latency =
> dc->debug.dram_clock_change_latency_ns / 
> 1000.0;
> }
> -   kernel_fpu_end();
> +   DC_FP_END();
>
> return updated;
>  }
> @@ -742,7 +742,7 @@ bool dcn_validate_bandwidth(
> dcn_bw_sync_calcs_and_dml(dc);
>
> memset(v, 0, sizeof(*v));
> -   kernel_fpu_begin();
> +   DC_FP_START();
>
> v->sr_exit_time = dc->dcn_soc->sr_exit_time;
> v->sr_enter_plus_exit_time = dc->dcn_soc->sr_enter_plus_exit_time;
> @@ -1275,7 +1275,7 @@ bool dcn_validate_bandwidth(
> bw_limit = dc->dcn_soc->percent_disp_bw_limit * 
> v->fabric_and_dram_bandwidth_vmax0p9;
> bw_limit_pass = (v->total_data_read_bandwidth / 1000.0) < bw_limit;
>
> -   kernel_fpu_end();
> +   DC_FP_END();
>
> PERFORMANCE_TRACE_END();
> BW_VAL_TRACE_FINISH();
> @@ -1443,7 +1443,7 @@ void dcn_bw_update_from_pplib(struct dc *dc)
> res = dm_pp_get_clock_levels_by_type_with_voltage(
> ctx, DM_PP_CLOCK_TYPE_FCLK, );
>
> -   kernel_fpu_begin();
> +   DC_FP_START();
>
> if (res)
> res = verify_clock_values();
> @@ -1463,12 +1463,12 @@ void dcn_bw_update_from_pplib(struct dc *dc)
> } else
> BREAK_TO_DEBUGGER();
>
> -   kernel_fpu_end();
> +   DC_FP_END();
>
> res = dm_pp_get_clock_levels_by_type_with_voltage(
> ctx, DM_PP_CLOCK_TYPE_DCFCLK, );
>
> -   kernel_fpu_begin();
> +   DC_FP_START();
>
> if (res)
> res = verify_clock_values();
> @@ -1481,7 +1481,7 @@ void dcn_bw_update_from_pplib(struct dc *dc)
> } else
> BREAK_TO_DEBUGGER();
>
> -   kernel_fpu_end();
> +   DC_FP_END();
>  }
>
>  void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc)
> @@ -1496,11 +1496,11 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc)
> if (!pp || !pp->set_wm_ranges)
> return;
>
> -   kernel_fpu_begin();
> +   DC_FP_START();
> min_fclk_khz = dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 * 
> 100 / 32;
> min_dcfclk_khz = dc->dcn_soc->dcfclkv_min0p65 * 1000;
> socclk_khz = dc->dcn_soc->socclk * 1000;
> -   kernel_fpu_end();
> +   DC_FP_END();
>
> /* Now notify PPLib/SMU about which Watermarks sets they should select
>  * depending on DPM state they are in. And update BW MGR GFX Engine 
> and
> @@ -1551,7 +1551,7 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc)
>
>  void dcn_bw_sync_calcs_and_dml(struct dc *dc)
>  {
> -   kernel_fpu_begin();
> +   DC_FP_START();
> DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %f ns\n"
> "sr_enter_plus_exit_time: %f ns\n"
> "urgent_latency: %f ns\n"
> @@ -1740,5 +1740,5 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc)
> dc->dml.ip.bug_forcing_LC_req_same_size_fixed =
> 
> dc->dcn_ip->bug_forcing_luma_and_chroma_request_to_same_size_fixed == 
> dcn_bw_yes;
> dc->dml.ip.dcfclk_cstate_latency = dc->dcn_ip->dcfclk_cstate_latency;
> -   kernel_fpu_end();
> +   DC_FP_END();
>  }
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
> b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
> index 09793336d84f..74ad6f09c1d4 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
> @@ -3243,7 +3243,7 @@ void dcn20_update_bounding_box(struct dc *dc, struct 
> _vcs_dpi_soc_bounding_box_s
>
>  void 

Re: [PATCH][next] drm/amd/powerplay: fix various dereferences of a pointer before it is null checked

2019-12-12 Thread Alex Deucher
On Thu, Dec 12, 2019 at 1:17 PM Colin King  wrote:
>
> From: Colin Ian King 
>
> There are several occurrances of the pointer hwmgr being dereferenced
> before it is null checked.  Fix these by performing the dereference
> of hwmgr after it has been null checked.
>
> Addresses-Coverity: ("Dereference before null check")
> Fixes: 8497d2bcdee1 ("drm/amd/powerplay: enable pp one vf mode for vega10")
> Signed-off-by: Colin Ian King 

Applied.  thanks!

Alex

> ---
>  drivers/gpu/drm/amd/powerplay/amd_powerplay.c |  6 +++---
>  drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c   | 15 +++
>  2 files changed, 6 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c 
> b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> index 5087d6bdba60..322c2015d3a0 100644
> --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> @@ -275,12 +275,12 @@ static int pp_dpm_load_fw(void *handle)
>  {
> struct pp_hwmgr *hwmgr = handle;
>
> -   if (!hwmgr->not_vf)
> -   return 0;
> -
> if (!hwmgr || !hwmgr->smumgr_funcs || !hwmgr->smumgr_funcs->start_smu)
> return -EINVAL;
>
> +   if (!hwmgr->not_vf)
> +   return 0;
> +
> if (hwmgr->smumgr_funcs->start_smu(hwmgr)) {
> pr_err("fw load failed\n");
> return -EINVAL;
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c 
> b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
> index e2b82c902948..f48fdc7f0382 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
> @@ -282,10 +282,7 @@ int hwmgr_hw_init(struct pp_hwmgr *hwmgr)
>
>  int hwmgr_hw_fini(struct pp_hwmgr *hwmgr)
>  {
> -   if (!hwmgr->not_vf)
> -   return 0;
> -
> -   if (!hwmgr || !hwmgr->pm_en)
> +   if (!hwmgr || !hwmgr->pm_en || !hwmgr->not_vf)
> return 0;
>
> phm_stop_thermal_controller(hwmgr);
> @@ -305,10 +302,7 @@ int hwmgr_suspend(struct pp_hwmgr *hwmgr)
>  {
> int ret = 0;
>
> -   if (!hwmgr->not_vf)
> -   return 0;
> -
> -   if (!hwmgr || !hwmgr->pm_en)
> +   if (!hwmgr || !hwmgr->pm_en || !hwmgr->not_vf)
> return 0;
>
> phm_disable_smc_firmware_ctf(hwmgr);
> @@ -327,13 +321,10 @@ int hwmgr_resume(struct pp_hwmgr *hwmgr)
>  {
> int ret = 0;
>
> -   if (!hwmgr->not_vf)
> -   return 0;
> -
> if (!hwmgr)
> return -EINVAL;
>
> -   if (!hwmgr->pm_en)
> +   if (!hwmgr->not_vf || !hwmgr->pm_en)
> return 0;
>
> ret = phm_setup_asic(hwmgr);
> --
> 2.24.0
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[pull] amdgpu drm-fixes-5.5

2019-12-12 Thread Alex Deucher
Hi Dave, Daniel,

Fixes for 5.5.

The following changes since commit b53bd16fec3d52ff7be1648a9b0a747288f52cf8:

  Merge tag 'drm-misc-next-fixes-2019-12-04' of 
git://anongit.freedesktop.org/drm/drm-misc into drm-next (2019-12-05 11:11:11 
+1000)

are available in the Git repository at:

  git://people.freedesktop.org/~agd5f/linux tags/drm-fixes-5.5-2019-12-12

for you to fetch changes up to f271fe1856138d402e0438f994ccae95f9044b2c:

  drm/amdgpu: add invalidate semaphore limit for SRIOV in gmc10 (2019-12-12 
16:13:48 -0500)


drm-fixes-5.5-2019-12-12:

amdgpu:
- DC fixes for renoir
- Gfx8 fence flush align with mesa
- Power profile fix for arcturus
- Freesync fix
- DC I2c over aux fix
- DC aux defer fix
- GPU reset fix
- GPUVM invalidation semaphore fixes for PCO and SR-IOV
- Golden settings updates for gfx10


Alex Deucher (4):
  drm/amdgpu: add header line for power profile on Arcturus
  drm/amdgpu/display: add fallthrough comment
  drm/amdgpu: fix license on Kconfig and Makefiles
  Revert "drm/amdgpu: dont schedule jobs while in reset"

Amanda Liu (1):
  drm/amd/display: Fix screen tearing on vrr tests

Arnd Bergmann (2):
  drm/amd/display: fix undefined struct member reference
  drm/amd/display: include linux/slab.h where needed

Brandon Syu (1):
  drm/amd/display: fixed that I2C over AUX didn't read data issue

David Galiffi (1):
  drm/amd/display: Fixed kernel panic when booting with DP-to-HDMI dongle

Eric Yang (2):
  drm/amd/display: update sr and pstate latencies for Renoir
  drm/amd/display: update dispclk and dppclk vco frequency

George Shen (1):
  drm/amd/display: Increase the number of retries after AUX DEFER

Guchun Chen (1):
  drm/amdgpu: add check before enabling/disabling broadcast mode

Joseph Gravenor (3):
  drm/amd/display: fix DalDramClockChangeLatencyNs override
  drm/amd/display: populate bios integrated info for renoir
  drm/amd/display: have two different sr and pstate latency tables for 
renoir

Leo (Hanghong) Ma (1):
  drm/amd/display: Change the delay time before enabling FEC

Nikola Cornij (2):
  drm/amd/display: Map DSC resources 1-to-1 if numbers of OPPs and DSCs are 
equal
  drm/amd/display: Reset steer fifo before unblanking the stream

Pierre-Eric Pelloux-Prayer (1):
  drm/amdgpu: add cache flush workaround to gfx8 emit_fence

Tianci.Yin (4):
  drm/amdgpu/gfx10: update gfx golden settings
  drm/amdgpu/gfx10: update gfx golden settings for navi14
  drm/amdgpu/gfx10: update gfx golden settings
  drm/amdgpu/gfx10: update gfx golden settings for navi14

Yongqiang Sun (1):
  drm/amd/display: Compare clock state member to determine optimization.

changzhu (3):
  drm/amdgpu: avoid using invalidate semaphore for picasso
  drm/amdgpu: add invalidate semaphore limit for SRIOV and picasso in gmc9
  drm/amdgpu: add invalidate semaphore limit for SRIOV in gmc10

 drivers/gpu/drm/amd/acp/Kconfig|   2 +-
 drivers/gpu/drm/amd/amdgpu/Kconfig |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c|   5 +-
 drivers/gpu/drm/amd/amdgpu/df_v3_6.c   |  38 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c |   6 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  |  22 +++-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  29 +++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  32 +++--
 drivers/gpu/drm/amd/amdkfd/Kconfig |   2 +-
 drivers/gpu/drm/amd/display/Kconfig|   2 +-
 drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c |   1 +
 .../drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c  | 134 -
 drivers/gpu/drm/amd/display/dc/core/dc_link.c  |   2 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c  |   2 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c   |   9 +-
 drivers/gpu/drm/amd/display/dc/dce/dce_aux.c   |  33 +++--
 drivers/gpu/drm/amd/display/dc/dcn20/Makefile  |   1 +
 .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.c  |  15 ++-
 .../amd/display/dc/dcn20/dcn20_stream_encoder.c|  12 +-
 drivers/gpu/drm/amd/display/dc/dcn21/Makefile  |   1 +
 .../gpu/drm/amd/display/dc/dcn21/dcn21_resource.c  |  24 +++-
 drivers/gpu/drm/amd/display/dc/dsc/Makefile|   1 +
 drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h|   2 +
 .../gpu/drm/amd/display/include/i2caux_interface.h |   2 +-
 .../drm/amd/display/modules/freesync/freesync.c|  32 ++---
 .../gpu/drm/amd/display/modules/inc/mod_freesync.h |   1 -
 drivers/gpu/drm/amd/powerplay/arcturus_ppt.c   |   5 +
 27 files changed, 299 insertions(+), 118 deletions(-)
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH][next] drm/amd/powerplay: fix various dereferences of a pointer before it is null checked

2019-12-12 Thread Colin King
From: Colin Ian King 

There are several occurrances of the pointer hwmgr being dereferenced
before it is null checked.  Fix these by performing the dereference
of hwmgr after it has been null checked.

Addresses-Coverity: ("Dereference before null check")
Fixes: 8497d2bcdee1 ("drm/amd/powerplay: enable pp one vf mode for vega10")
Signed-off-by: Colin Ian King 
---
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c |  6 +++---
 drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c   | 15 +++
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c 
b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 5087d6bdba60..322c2015d3a0 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -275,12 +275,12 @@ static int pp_dpm_load_fw(void *handle)
 {
struct pp_hwmgr *hwmgr = handle;
 
-   if (!hwmgr->not_vf)
-   return 0;
-
if (!hwmgr || !hwmgr->smumgr_funcs || !hwmgr->smumgr_funcs->start_smu)
return -EINVAL;
 
+   if (!hwmgr->not_vf)
+   return 0;
+
if (hwmgr->smumgr_funcs->start_smu(hwmgr)) {
pr_err("fw load failed\n");
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c 
b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index e2b82c902948..f48fdc7f0382 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -282,10 +282,7 @@ int hwmgr_hw_init(struct pp_hwmgr *hwmgr)
 
 int hwmgr_hw_fini(struct pp_hwmgr *hwmgr)
 {
-   if (!hwmgr->not_vf)
-   return 0;
-
-   if (!hwmgr || !hwmgr->pm_en)
+   if (!hwmgr || !hwmgr->pm_en || !hwmgr->not_vf)
return 0;
 
phm_stop_thermal_controller(hwmgr);
@@ -305,10 +302,7 @@ int hwmgr_suspend(struct pp_hwmgr *hwmgr)
 {
int ret = 0;
 
-   if (!hwmgr->not_vf)
-   return 0;
-
-   if (!hwmgr || !hwmgr->pm_en)
+   if (!hwmgr || !hwmgr->pm_en || !hwmgr->not_vf)
return 0;
 
phm_disable_smc_firmware_ctf(hwmgr);
@@ -327,13 +321,10 @@ int hwmgr_resume(struct pp_hwmgr *hwmgr)
 {
int ret = 0;
 
-   if (!hwmgr->not_vf)
-   return 0;
-
if (!hwmgr)
return -EINVAL;
 
-   if (!hwmgr->pm_en)
+   if (!hwmgr->not_vf || !hwmgr->pm_en)
return 0;
 
ret = phm_setup_asic(hwmgr);
-- 
2.24.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: queue kfd interrupt work to different CPU

2019-12-12 Thread Philip Yang
Because queue_work schedule the work on the same CPU the interrupt
handler is running, if there are many interrupts pending, it takes
longer time for work queue to start, or even worse system will hang.

v2: queue work to same NUMA node for better cache locality
v3: handle cpumask_next wraparound case

Signed-off-by: Philip Yang 
Reviewed-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 209bfc849352..c6b6901bbda3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -822,6 +822,21 @@ static int kfd_resume(struct kfd_dev *kfd)
return err;
 }
 
+static inline void kfd_queue_work(struct workqueue_struct *wq,
+ struct work_struct *work)
+{
+   int cpu, new_cpu;
+
+   cpu = new_cpu = smp_processor_id();
+   do {
+   new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
+   if (cpu_to_node(new_cpu) == numa_node_id())
+   break;
+   } while (cpu != new_cpu);
+
+   queue_work_on(new_cpu, wq, work);
+}
+
 /* This is called directly from KGD at ISR. */
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 {
@@ -844,7 +859,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void 
*ih_ring_entry)
   patched_ihre, _patched)
&& enqueue_ih_ring_entry(kfd,
 is_patched ? patched_ihre : ih_ring_entry))
-   queue_work(kfd->ih_wq, >interrupt_work);
+   kfd_queue_work(kfd->ih_wq, >interrupt_work);
 
spin_unlock_irqrestore(>interrupt_lock, flags);
 }
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 6/6] drm/amd/display: add event type check before restart the authentication

2019-12-12 Thread Harry Wentland
Patches 1-3 are
Reviewed-by: Harry Wentland 

Patches 4-6 are
Acked-by: Harry Wentland 

Harry

On 2019-12-12 12:06 p.m., Bhawanpreet Lakha wrote:
> From: Xiaodong Yan 
> 
> [Why]
> Some combined docks will always trigger CP_IRQ but there's nothing the driver
> needs to take care of, but the CP_IRQ breaks the original hdcp state and
> triggers the driver to restart the authentication.
> 
> [How]
> Add the event type check before restart the authentication or resend the 
> stream
> management
> 
> Signed-off-by: Xiaodong Yan 
> Reviewed-by: Wenjing Liu 
> ---
>  .../gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c  | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c 
> b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
> index da190739a969..8cae3e3aacd5 100644
> --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
> +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
> @@ -630,7 +630,10 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct 
> mod_hdcp *hdcp,
>   break;
>   } else if (input->prepare_stream_manage != PASS ||
>   input->stream_manage_write != PASS) {
> - fail_and_restart_in_ms(0, , output);
> + if (event_ctx->event == MOD_HDCP_EVENT_CALLBACK)
> + fail_and_restart_in_ms(0, , output);
> + else
> + increment_stay_counter(hdcp);
>   break;
>   }
>   callback_in_ms(100, output);
> @@ -655,10 +658,12 @@ enum mod_hdcp_status 
> mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp,
>*/
>   if (hdcp->auth.count.stream_management_retry_count > 
> 10) {
>   fail_and_restart_in_ms(0, , output);
> - } else {
> + } else if (event_ctx->event == MOD_HDCP_EVENT_CALLBACK) 
> {
>   
> hdcp->auth.count.stream_management_retry_count++;
>   callback_in_ms(0, output);
>   set_state_id(hdcp, output, 
> D2_A9_SEND_STREAM_MANAGEMENT);
> + } else {
> + increment_stay_counter(hdcp);
>   }
>   break;
>   }
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: queue kfd interrupt work to different CPU

2019-12-12 Thread Philip Yang
Because queue_work schedule the work on the same CPU the interrupt
handler is running, if there are many interrupts pending, it takes
longer time for work queue to start, or even worse system will hang.

v2: queue work to same NUMA node for better cache locality

Signed-off-by: Philip Yang 
Reviewed-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 209bfc849352..1dad76a3f3c8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -822,6 +822,21 @@ static int kfd_resume(struct kfd_dev *kfd)
return err;
 }
 
+static inline void kfd_queue_work(struct workqueue_struct *wq,
+ struct work_struct *work)
+{
+   int cpu, new_cpu;
+
+   cpu = new_cpu = smp_processor_id();
+   do {
+   new_cpu = cpumask_next(new_cpu, cpu_online_mask);
+   if (cpu_to_node(new_cpu) == numa_node_id())
+   break;
+   } while (cpu != new_cpu);
+
+   queue_work_on(new_cpu, wq, work);
+}
+
 /* This is called directly from KGD at ISR. */
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 {
@@ -844,7 +859,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void 
*ih_ring_entry)
   patched_ihre, _patched)
&& enqueue_ih_ring_entry(kfd,
 is_patched ? patched_ihre : ih_ring_entry))
-   queue_work(kfd->ih_wq, >interrupt_work);
+   kfd_queue_work(kfd->ih_wq, >interrupt_work);
 
spin_unlock_irqrestore(>interrupt_lock, flags);
 }
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/amdgpu/vcn1.0: use its own idle handler and begin use funcs

2019-12-12 Thread James Zhu

Reviewed-by: James Zhu  for the series.

On 2019-12-12 11:06 a.m., Leo Liu wrote:

Because VCN1.0 power management and DPG mode are managed together with
JPEG1.0 under both HW and FW, so separated them from general VCN code.
Also the multiple instances case got removed, since VCN1.0 HW just have
a single instance.

Signed-off-by: Leo Liu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c |  7 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  3 +
  drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c  |  3 +-
  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c   | 88 -
  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h   |  2 +
  5 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 428cfd58b37d..e962c87d04cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -39,9 +39,6 @@
  #include "vcn/vcn_1_0_offset.h"
  #include "vcn/vcn_1_0_sh_mask.h"
  
-/* 1 second timeout */

-#define VCN_IDLE_TIMEOUT   msecs_to_jiffies(1000)
-
  /* Firmware Names */
  #define FIRMWARE_RAVEN"amdgpu/raven_vcn.bin"
  #define FIRMWARE_PICASSO  "amdgpu/picasso_vcn.bin"
@@ -71,7 +68,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
unsigned char fw_check;
int i, r;
  
-	INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler);

+   /* For VCN2.0 and above */
+   if (adev->asic_type >= CHIP_ARCTURUS)
+   INIT_DELAYED_WORK(>vcn.idle_work, 
amdgpu_vcn_idle_work_handler);
  
  	switch (adev->asic_type) {

case CHIP_RAVEN:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 402a5046b985..3484ead62046 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -56,6 +56,9 @@
  #define VCN_VID_IP_ADDRESS_2_00x0
  #define VCN_AON_IP_ADDRESS_2_00x3
  
+/* 1 second timeout */

+#define VCN_IDLE_TIMEOUT   msecs_to_jiffies(1000)
+
  #define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel)  
\
({  WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask);   
\
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL,   
\
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index a141408dfb23..0debfd9f428c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -25,6 +25,7 @@
  #include "amdgpu_jpeg.h"
  #include "soc15.h"
  #include "soc15d.h"
+#include "vcn_v1_0.h"
  
  #include "vcn/vcn_1_0_offset.h"

  #include "vcn/vcn_1_0_sh_mask.h"
@@ -561,7 +562,7 @@ static const struct amdgpu_ring_funcs 
jpeg_v1_0_decode_ring_vm_funcs = {
.insert_start = jpeg_v1_0_decode_ring_insert_start,
.insert_end = jpeg_v1_0_decode_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
-   .begin_use = amdgpu_vcn_ring_begin_use,
+   .begin_use = vcn_v1_0_ring_begin_use,
.end_use = amdgpu_vcn_ring_end_use,
.emit_wreg = jpeg_v1_0_decode_ring_emit_wreg,
.emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 652cecc030b3..7395286540e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -25,6 +25,7 @@
  
  #include "amdgpu.h"

  #include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
  #include "soc15.h"
  #include "soc15d.h"
  #include "soc15_common.h"
@@ -51,6 +52,8 @@ static int vcn_v1_0_set_powergating_state(void *handle, enum 
amd_powergating_sta
  static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
struct dpg_pause_state *new_state);
  
+static void vcn_v1_0_idle_work_handler(struct work_struct *work);

+
  /**
   * vcn_v1_0_early_init - set function pointers
   *
@@ -101,6 +104,7 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
}
  
+	INIT_DELAYED_WORK(>vcn.idle_work, vcn_v1_0_idle_work_handler);

r = amdgpu_vcn_sw_init(adev);
if (r)
return r;
@@ -1758,6 +1762,86 @@ static int vcn_v1_0_set_powergating_state(void *handle,
return ret;
  }
  
+static void vcn_v1_0_idle_work_handler(struct work_struct *work)

+{
+   struct amdgpu_device *adev =
+   container_of(work, struct amdgpu_device, vcn.idle_work.work);
+   unsigned int fences = 0, i;
+
+   for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+   fences += 
amdgpu_fence_count_emitted(>vcn.inst->ring_enc[i]);
+
+   if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+   struct dpg_pause_state new_state;
+
+   if (fences)
+   new_state.fw_based = VCN_DPG_STATE__PAUSE;
+   else
+   new_state.fw_based = 

[PATCH 3/6] drm/amd/display: Return correct Error code for validate h_prime

2019-12-12 Thread Bhawanpreet Lakha
[Why]
We are returning incorrect error code for validate h prime

[How]
Return the right Error code

Signed-off-by: Bhawanpreet Lakha 
---
 drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c 
b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
index 8f2e2fe50710..7911dc157d5a 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
@@ -511,7 +511,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_h_prime(struct 
mod_hdcp *hdcp)
psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
-   return MOD_HDCP_STATUS_HDCP2_VALIDATE_AKE_CERT_FAILURE;
+   return MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE;
 
if (msg_out->process.msg1_status != 
TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
return MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/6] drm/amd/display: fix psp return condition for hdcp module

2019-12-12 Thread Bhawanpreet Lakha
We are returning SUCCESS when hdcp_status != Success. Fix it.

Signed-off-by: Bhawanpreet Lakha 
---
 drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c 
b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
index ef4eb55f4474..03476bb1367d 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
@@ -794,7 +794,7 @@ enum mod_hdcp_status 
mod_hdcp_hdcp2_validate_stream_ready(struct mod_hdcp *hdcp)
hdcp_cmd->cmd_id = 
TA_HDCP_COMMAND__HDCP2_PREPARE_PROCESS_AUTHENTICATION_MSG_V2;
psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
-   return (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS) &&
+   return (hdcp_cmd->hdcp_status == TA_HDCP_STATUS__SUCCESS) &&
   (msg_out->process.msg1_status == 
TA_HDCP2_MSG_AUTHENTICATION_STATUS__SUCCESS)
   ? MOD_HDCP_STATUS_SUCCESS
   : MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/6] drm/amd/display: Fix hdcp1 create session

2019-12-12 Thread Bhawanpreet Lakha
[Why]
PSP needs session ID to destroy a session, In the case where we fail
create session we don't have a session ID

[How]
Set the session ID before returning

Signed-off-by: Bhawanpreet Lakha 
---
 drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c 
b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
index 03476bb1367d..8f2e2fe50710 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
@@ -145,10 +145,11 @@ enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct 
mod_hdcp *hdcp)
 
psp_hdcp_invoke(psp, hdcp_cmd->cmd_id);
 
+   hdcp->auth.id = hdcp_cmd->out_msg.hdcp1_create_session.session_handle;
+
if (hdcp_cmd->hdcp_status != TA_HDCP_STATUS__SUCCESS)
return MOD_HDCP_STATUS_HDCP1_CREATE_SESSION_FAILURE;
 
-   hdcp->auth.id = hdcp_cmd->out_msg.hdcp1_create_session.session_handle;
hdcp->auth.msg.hdcp1.ainfo = 
hdcp_cmd->out_msg.hdcp1_create_session.ainfo_primary;
memcpy(hdcp->auth.msg.hdcp1.aksv, 
hdcp_cmd->out_msg.hdcp1_create_session.aksv_primary,
sizeof(hdcp->auth.msg.hdcp1.aksv));
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/6] drm/amd/display: rx_validation failed resume from sleep

2019-12-12 Thread Bhawanpreet Lakha
From: Jing Zhou 

[why]
Most DP/HDMI monitors need more time to response rx_validation
request.

[how]
Add generic 1000ms delay.

Signed-off-by: Jing Zhou 
Reviewed-by: Wenjing Liu 
---
 .../display/modules/hdcp/hdcp1_transition.c   | 20 +++
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c 
b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c
index 136b8011ff3f..21ebc62bb9d9 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c
@@ -67,11 +67,19 @@ enum mod_hdcp_status mod_hdcp_hdcp1_transition(struct 
mod_hdcp *hdcp,
break;
case H1_A2_COMPUTATIONS_A3_VALIDATE_RX_A6_TEST_FOR_REPEATER:
if (input->bcaps_read != PASS ||
-   input->r0p_read != PASS ||
-   input->rx_validation != PASS ||
-   (!conn->is_repeater && input->encryption != 
PASS)) {
+   input->r0p_read != PASS) {
+   fail_and_restart_in_ms(0, , output);
+   break;
+   } else if (input->rx_validation != PASS) {
/* 1A-06: consider invalid r0' a failure */
/* 1A-08: consider bksv listed in SRM a failure */
+   /*
+* some slow RX will fail rx validation when it is
+* not ready. give it more time to react before retry.
+*/
+   fail_and_restart_in_ms(1000, , output);
+   break;
+   } else if (!conn->is_repeater && input->encryption != PASS) {
fail_and_restart_in_ms(0, , output);
break;
}
@@ -212,7 +220,11 @@ enum mod_hdcp_status mod_hdcp_hdcp1_dp_transition(struct 
mod_hdcp *hdcp,
 * after 3 attempts.
 * 1A-08: consider bksv listed in SRM a failure
 */
-   fail_and_restart_in_ms(0, , output);
+   /*
+* some slow RX will fail rx validation when it 
is
+* not ready. give it more time to react before 
retry.
+*/
+   fail_and_restart_in_ms(1000, , output);
}
break;
} else if ((!conn->is_repeater && input->encryption != PASS) ||
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 0/6] HDCP fixes

2019-12-12 Thread Bhawanpreet Lakha
Summary of changes
*Fix return codes
*Fix some displays failing authentication

Bhawanpreet Lakha (3):
  drm/amd/display: fix psp return condition for hdcp module
  drm/amd/display: Fix hdcp1 create session
  drm/amd/display: Return correct Error code for validate h_prime

Jing Zhou (1):
  drm/amd/display: rx_validation failed resume from sleep

Michael Strauss (1):
  drm/amd/display: Add delay after h' watchdog timeout event

Xiaodong Yan (1):
  drm/amd/display: add event type check before restart the
authentication

 .../display/modules/hdcp/hdcp1_transition.c   | 20 +++
 .../display/modules/hdcp/hdcp2_transition.c   | 17 ++--
 .../drm/amd/display/modules/hdcp/hdcp_psp.c   |  7 ---
 3 files changed, 31 insertions(+), 13 deletions(-)

-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 6/6] drm/amd/display: add event type check before restart the authentication

2019-12-12 Thread Bhawanpreet Lakha
From: Xiaodong Yan 

[Why]
Some combined docks will always trigger CP_IRQ but there's nothing the driver
needs to take care of, but the CP_IRQ breaks the original hdcp state and
triggers the driver to restart the authentication.

[How]
Add the event type check before restart the authentication or resend the stream
management

Signed-off-by: Xiaodong Yan 
Reviewed-by: Wenjing Liu 
---
 .../gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c  | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c 
b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
index da190739a969..8cae3e3aacd5 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
@@ -630,7 +630,10 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct 
mod_hdcp *hdcp,
break;
} else if (input->prepare_stream_manage != PASS ||
input->stream_manage_write != PASS) {
-   fail_and_restart_in_ms(0, , output);
+   if (event_ctx->event == MOD_HDCP_EVENT_CALLBACK)
+   fail_and_restart_in_ms(0, , output);
+   else
+   increment_stay_counter(hdcp);
break;
}
callback_in_ms(100, output);
@@ -655,10 +658,12 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct 
mod_hdcp *hdcp,
 */
if (hdcp->auth.count.stream_management_retry_count > 
10) {
fail_and_restart_in_ms(0, , output);
-   } else {
+   } else if (event_ctx->event == MOD_HDCP_EVENT_CALLBACK) 
{

hdcp->auth.count.stream_management_retry_count++;
callback_in_ms(0, output);
set_state_id(hdcp, output, 
D2_A9_SEND_STREAM_MANAGEMENT);
+   } else {
+   increment_stay_counter(hdcp);
}
break;
}
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 5/6] drm/amd/display: Add delay after h' watchdog timeout event

2019-12-12 Thread Bhawanpreet Lakha
From: Michael Strauss 

[WHY]
Some monitors trigger HDCP2.x timeout after reinitializing (e.g. toggling HDR)
by taking longer than expected to return h' (h prime)
Previously the 200ms watchdog timer retry count would hit
MAX_NUM_OF_ATTEMPTS (4), causing fallback to HDCP1.x

[HOW]
Adding a 1s delay after an h' watchdog timeout provides enough time
for affected monitors to return h' in time without hitting MAX_NUM_OF_ATTEMPTS

Signed-off-by: Michael Strauss 
Reviewed-by: Wenjing Liu 
---
 .../gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c   | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c 
b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
index e8043c903a84..da190739a969 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
@@ -114,7 +114,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct 
mod_hdcp *hdcp,
if (event_ctx->event ==
MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
/* 1A-11-3: consider h' timeout a failure */
-   fail_and_restart_in_ms(0, , output);
+   fail_and_restart_in_ms(1000, , output);
} else {
/* continue h' polling */
callback_in_ms(100, output);
@@ -166,7 +166,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct 
mod_hdcp *hdcp,
if (event_ctx->event ==
MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) {
/* 1A-11-2: consider h' timeout a failure */
-   fail_and_restart_in_ms(0, , output);
+   fail_and_restart_in_ms(1000, , output);
} else {
/* continue h' polling */
callback_in_ms(20, output);
@@ -439,7 +439,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct 
mod_hdcp *hdcp,
if (event_ctx->event ==
MOD_HDCP_EVENT_WATCHDOG_TIMEOUT)
/* 1A-10-3: consider h' timeout a failure */
-   fail_and_restart_in_ms(0, , output);
+   fail_and_restart_in_ms(1000, , output);
else
increment_stay_counter(hdcp);
break;
@@ -484,7 +484,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct 
mod_hdcp *hdcp,
if (event_ctx->event ==
MOD_HDCP_EVENT_WATCHDOG_TIMEOUT)
/* 1A-10-2: consider h' timeout a failure */
-   fail_and_restart_in_ms(0, , output);
+   fail_and_restart_in_ms(1000, , output);
else
increment_stay_counter(hdcp);
break;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add JPEG check to VCN idle handler and begin use

2019-12-12 Thread Leo Liu


On 2019-12-12 3:18 a.m., Christian König wrote:

Am 11.12.19 um 20:48 schrieb Leo Liu:

Since it's only needed with VCN1.0 when HW has no its
own JPEG HW IP block


Wouldn't it be simpler/cleaner to just define a 
vcn_v1_0_ring_begin_use() and vcn_v1_0_idle_work_handler() instead?


Yeah, this way should be cleaner, even though the changes got bigger, 
the new set will be sent shortly.


Thanks,

Leo





Regards,
Christian.



Signed-off-by: Leo Liu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 29 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
  2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

index 428cfd58b37d..95ac721f2de0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -186,6 +186,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
  }
  }
  +    adev->vcn.has_jpeg_block = 
(amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?

+    true : false;
+
  return 0;
  }
  @@ -306,15 +309,17 @@ static void 
amdgpu_vcn_idle_work_handler(struct work_struct *work)

  else
  new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
  -    if 
(amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec))

-    new_state.jpeg = VCN_DPG_STATE__PAUSE;
-    else
-    new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
-
+    if (!adev->vcn.has_jpeg_block) {
+    if 
(amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec))

+    new_state.jpeg = VCN_DPG_STATE__PAUSE;
+    else
+    new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+    }
  adev->vcn.pause_dpg_mode(adev, _state);
  }
  -    fence[j] += 
amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec);

+    if (!adev->vcn.has_jpeg_block)
+    fence[j] += 
amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec);
  fence[j] += 
amdgpu_fence_count_emitted(>vcn.inst[j].ring_dec);

  fences += fence[j];
  }
@@ -358,14 +363,16 @@ void amdgpu_vcn_ring_begin_use(struct 
amdgpu_ring *ring)

  else
  new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
  -    if 
(amdgpu_fence_count_emitted(>jpeg.inst[ring->me].ring_dec))

-    new_state.jpeg = VCN_DPG_STATE__PAUSE;
-    else
-    new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+    if (!adev->vcn.has_jpeg_block) {
+    if 
(amdgpu_fence_count_emitted(>jpeg.inst[ring->me].ring_dec))

+    new_state.jpeg = VCN_DPG_STATE__PAUSE;
+    else
+    new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+    }
    if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
  new_state.fw_based = VCN_DPG_STATE__PAUSE;
-    else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+    else if (!adev->vcn.has_jpeg_block && ring->funcs->type == 
AMDGPU_RING_TYPE_VCN_JPEG)

  new_state.jpeg = VCN_DPG_STATE__PAUSE;
    adev->vcn.pause_dpg_mode(adev, _state);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

index 402a5046b985..9a2381d006c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -192,6 +192,8 @@ struct amdgpu_vcn {
  unsigned    harvest_config;
  int (*pause_dpg_mode)(struct amdgpu_device *adev,
  struct dpg_pause_state *new_state);
+
+    bool has_jpeg_block;
  };
    int amdgpu_vcn_sw_init(struct amdgpu_device *adev);



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu/vcn: remove JPEG related code from idle handler and begin use

2019-12-12 Thread Leo Liu
For VCN2.0 and above, VCN has been separated from JPEG

Signed-off-by: Leo Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 28 +
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index e962c87d04cf..2ff04d0047ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -293,6 +293,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct 
*work)
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
if (adev->vcn.harvest_config & (1 << j))
continue;
+
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
fence[j] += 
amdgpu_fence_count_emitted(>vcn.inst[j].ring_enc[i]);
}
@@ -305,26 +306,17 @@ static void amdgpu_vcn_idle_work_handler(struct 
work_struct *work)
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
 
-   if 
(amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec))
-   new_state.jpeg = VCN_DPG_STATE__PAUSE;
-   else
-   new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
-
adev->vcn.pause_dpg_mode(adev, _state);
}
 
-   fence[j] += 
amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec);
fence[j] += 
amdgpu_fence_count_emitted(>vcn.inst[j].ring_dec);
fences += fence[j];
}
 
if (fences == 0) {
amdgpu_gfx_off_ctrl(adev, true);
-   if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled)
-   amdgpu_dpm_enable_uvd(adev, false);
-   else
-   amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
-  
AMD_PG_STATE_GATE);
+   amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
+  AMD_PG_STATE_GATE);
} else {
schedule_delayed_work(>vcn.idle_work, VCN_IDLE_TIMEOUT);
}
@@ -337,11 +329,8 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 
if (set_clocks) {
amdgpu_gfx_off_ctrl(adev, false);
-   if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled)
-   amdgpu_dpm_enable_uvd(adev, true);
-   else
-   amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
-  
AMD_PG_STATE_UNGATE);
+   amdgpu_device_ip_set_powergating_state(adev, 
AMD_IP_BLOCK_TYPE_VCN,
+  AMD_PG_STATE_UNGATE);
}
 
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
@@ -357,15 +346,8 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
 
-   if 
(amdgpu_fence_count_emitted(>jpeg.inst[ring->me].ring_dec))
-   new_state.jpeg = VCN_DPG_STATE__PAUSE;
-   else
-   new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
-
if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
new_state.fw_based = VCN_DPG_STATE__PAUSE;
-   else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
-   new_state.jpeg = VCN_DPG_STATE__PAUSE;
 
adev->vcn.pause_dpg_mode(adev, _state);
}
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu/vcn1.0: use its own idle handler and begin use funcs

2019-12-12 Thread Leo Liu
Because VCN1.0 power management and DPG mode are managed together with
JPEG1.0 under both HW and FW, so separated them from general VCN code.
Also the multiple instances case got removed, since VCN1.0 HW just have
a single instance.

Signed-off-by: Leo Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c |  7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  3 +
 drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c  |  3 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c   | 88 -
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h   |  2 +
 5 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 428cfd58b37d..e962c87d04cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -39,9 +39,6 @@
 #include "vcn/vcn_1_0_offset.h"
 #include "vcn/vcn_1_0_sh_mask.h"
 
-/* 1 second timeout */
-#define VCN_IDLE_TIMEOUT   msecs_to_jiffies(1000)
-
 /* Firmware Names */
 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
 #define FIRMWARE_PICASSO   "amdgpu/picasso_vcn.bin"
@@ -71,7 +68,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
unsigned char fw_check;
int i, r;
 
-   INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler);
+   /* For VCN2.0 and above */
+   if (adev->asic_type >= CHIP_ARCTURUS)
+   INIT_DELAYED_WORK(>vcn.idle_work, 
amdgpu_vcn_idle_work_handler);
 
switch (adev->asic_type) {
case CHIP_RAVEN:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 402a5046b985..3484ead62046 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -56,6 +56,9 @@
 #define VCN_VID_IP_ADDRESS_2_0 0x0
 #define VCN_AON_IP_ADDRESS_2_0 0x3
 
+/* 1 second timeout */
+#define VCN_IDLE_TIMEOUT   msecs_to_jiffies(1000)
+
 #define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel)   
\
({  WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask);   
\
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL,   
\
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index a141408dfb23..0debfd9f428c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -25,6 +25,7 @@
 #include "amdgpu_jpeg.h"
 #include "soc15.h"
 #include "soc15d.h"
+#include "vcn_v1_0.h"
 
 #include "vcn/vcn_1_0_offset.h"
 #include "vcn/vcn_1_0_sh_mask.h"
@@ -561,7 +562,7 @@ static const struct amdgpu_ring_funcs 
jpeg_v1_0_decode_ring_vm_funcs = {
.insert_start = jpeg_v1_0_decode_ring_insert_start,
.insert_end = jpeg_v1_0_decode_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
-   .begin_use = amdgpu_vcn_ring_begin_use,
+   .begin_use = vcn_v1_0_ring_begin_use,
.end_use = amdgpu_vcn_ring_end_use,
.emit_wreg = jpeg_v1_0_decode_ring_emit_wreg,
.emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 652cecc030b3..7395286540e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -25,6 +25,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
 #include "soc15.h"
 #include "soc15d.h"
 #include "soc15_common.h"
@@ -51,6 +52,8 @@ static int vcn_v1_0_set_powergating_state(void *handle, enum 
amd_powergating_sta
 static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
struct dpg_pause_state *new_state);
 
+static void vcn_v1_0_idle_work_handler(struct work_struct *work);
+
 /**
  * vcn_v1_0_early_init - set function pointers
  *
@@ -101,6 +104,7 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
}
 
+   INIT_DELAYED_WORK(>vcn.idle_work, vcn_v1_0_idle_work_handler);
r = amdgpu_vcn_sw_init(adev);
if (r)
return r;
@@ -1758,6 +1762,86 @@ static int vcn_v1_0_set_powergating_state(void *handle,
return ret;
 }
 
+static void vcn_v1_0_idle_work_handler(struct work_struct *work)
+{
+   struct amdgpu_device *adev =
+   container_of(work, struct amdgpu_device, vcn.idle_work.work);
+   unsigned int fences = 0, i;
+
+   for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+   fences += 
amdgpu_fence_count_emitted(>vcn.inst->ring_enc[i]);
+
+   if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+   struct dpg_pause_state new_state;
+
+   if (fences)
+   new_state.fw_based = VCN_DPG_STATE__PAUSE;
+   else
+   new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+   if (amdgpu_fence_count_emitted(>jpeg.inst->ring_dec))
+   new_state.jpeg = 

Re: [PATCH] drm/amdgpu: add JPEG check to VCN idle handler and begin use

2019-12-12 Thread Christian König

Am 12.12.19 um 16:57 schrieb Leo Liu:


On 2019-12-12 3:18 a.m., Christian König wrote:

Am 11.12.19 um 20:48 schrieb Leo Liu:

Since it's only needed with VCN1.0 when HW has no its
own JPEG HW IP block


Wouldn't it be simpler/cleaner to just define a 
vcn_v1_0_ring_begin_use() and vcn_v1_0_idle_work_handler() instead?


Yeah, this way should be cleaner, even though the changes got bigger, 
the new set will be sent shortly.


Keep in mind that you don't need to fully clone the code.

You probably can still call the common VCN helper code quite a bit.

Christian.



Thanks,

Leo





Regards,
Christian.



Signed-off-by: Leo Liu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 29 
+++--

  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
  2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

index 428cfd58b37d..95ac721f2de0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -186,6 +186,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
  }
  }
  +    adev->vcn.has_jpeg_block = 
(amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?

+    true : false;
+
  return 0;
  }
  @@ -306,15 +309,17 @@ static void 
amdgpu_vcn_idle_work_handler(struct work_struct *work)

  else
  new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
  -    if 
(amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec))

-    new_state.jpeg = VCN_DPG_STATE__PAUSE;
-    else
-    new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
-
+    if (!adev->vcn.has_jpeg_block) {
+    if 
(amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec))

+    new_state.jpeg = VCN_DPG_STATE__PAUSE;
+    else
+    new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+    }
  adev->vcn.pause_dpg_mode(adev, _state);
  }
  -    fence[j] += 
amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec);

+    if (!adev->vcn.has_jpeg_block)
+    fence[j] += 
amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec);
  fence[j] += 
amdgpu_fence_count_emitted(>vcn.inst[j].ring_dec);

  fences += fence[j];
  }
@@ -358,14 +363,16 @@ void amdgpu_vcn_ring_begin_use(struct 
amdgpu_ring *ring)

  else
  new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
  -    if 
(amdgpu_fence_count_emitted(>jpeg.inst[ring->me].ring_dec))

-    new_state.jpeg = VCN_DPG_STATE__PAUSE;
-    else
-    new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+    if (!adev->vcn.has_jpeg_block) {
+    if 
(amdgpu_fence_count_emitted(>jpeg.inst[ring->me].ring_dec))

+    new_state.jpeg = VCN_DPG_STATE__PAUSE;
+    else
+    new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+    }
    if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
  new_state.fw_based = VCN_DPG_STATE__PAUSE;
-    else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+    else if (!adev->vcn.has_jpeg_block && ring->funcs->type == 
AMDGPU_RING_TYPE_VCN_JPEG)

  new_state.jpeg = VCN_DPG_STATE__PAUSE;
    adev->vcn.pause_dpg_mode(adev, _state);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

index 402a5046b985..9a2381d006c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -192,6 +192,8 @@ struct amdgpu_vcn {
  unsigned    harvest_config;
  int (*pause_dpg_mode)(struct amdgpu_device *adev,
  struct dpg_pause_state *new_state);
+
+    bool has_jpeg_block;
  };
    int amdgpu_vcn_sw_init(struct amdgpu_device *adev);




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [RESEND PATCH 4/5] Subject: drm/amdgpu: Redo XGMI reset synchronization.

2019-12-12 Thread Andrey Grodzovsky


On 12/11/19 11:05 PM, Ma, Le wrote:


[AMD Official Use Only - Internal Distribution Only]

-Original Message-
From: Andrey Grodzovsky 
Sent: Thursday, December 12, 2019 4:39 AM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Ma, Le 
; Zhang, Hawking ; Quan, Evan 
; Grodzovsky, Andrey 
Subject: [RESEND PATCH 4/5] Subject: drm/amdgpu: Redo XGMI reset 
synchronization.


Use task barrier in XGMI hive to synchronize ASIC resets across 
devices in XGMI hive.


Signed-off-by: Andrey Grodzovsky >


---

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 42 
+-


1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c


index 1d19edfa..e4089a0 100644

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

@@ -67,6 +67,7 @@

#include "amdgpu_tmz.h"

 #include 

+#include 

 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");

MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");

@@ -2663,14 +2664,43 @@ static void 
amdgpu_device_xgmi_reset_func(struct work_struct *__work)  {


   struct amdgpu_device *adev =

container_of(__work, struct amdgpu_device, xgmi_reset_work);

+  struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);

-   if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)

- adev->asic_reset_res = (adev->in_baco == false) ?

- amdgpu_device_baco_enter(adev->ddev) :

- qamdgpu_device_baco_exit(adev->ddev);

-   else

- adev->asic_reset_res = amdgpu_asic_reset(adev);

+  /*

+  * Use task barrier to synchronize all xgmi reset works 
across the


+  * hive.

+  * task_barrier_enter and task_barrier_exit will block 
untill all the


+  * threads running the xgmi reset works reach those points. 
I assume


+  * guarantee of progress here for all the threads as the 
workqueue code


+  * creates new worker threads as needed by amount of work 
items in queue


+  * (see worker_thread) and also each thread sleeps in the 
barrir and by


+  * this yielding the CPU for other work threads to make 
progress.


+  */

[Le]: This comments can be adjusted since we switch to 
system_unbound_wq in patch #5.


+  if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {

+

+  if (hive)

+ task_barrier_enter(>tb);

[Le]: The multiple hive condition can be checked only once and moved 
to the location right after the assignment.




Not sure what you meant here but in fact let's note that while in 
amdgpu_device_xgmi_reset_func it's a bug for amdgpu_get_xgmi_hive to 
return NULL so I think better instead to add WARN_ON(!hive,"...") and 
return right at the beginning of the function if indeed hive == NULL


Andrey



+

+ adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);

+

+  if (adev->asic_reset_res)

+  goto fail;

+

+  if (hive)

+ task_barrier_exit(>tb);

[Le]: Same as above.

+

+ adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);

+

+  if (adev->asic_reset_res)

+  goto fail;

+  } else {

+  if (hive)

+ task_barrier_full(>tb);

[Le]: Same as above.

With above addressed, Reviewed-by: Le Ma >


Regards,

Ma Le

+

+ adev->asic_reset_res =  amdgpu_asic_reset(adev);

+  }

+fail:

   if (adev->asic_reset_res)

   DRM_WARN("ASIC reset failed with error, %d for 
drm dev, %s",


 adev->asic_reset_res, adev->ddev->unique);

--

2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/5] drm/amdgpu: immedially invalidate PTEs

2019-12-12 Thread Christian König

Hi Philip,

that is an expected result. You can only invalidate page tables without 
holding the reservation lock.


What you do here is adding a new mapping and that one needs to allocate 
a new page tables and won't work like this.


Regards,
Christian.

Am 12.12.19 um 15:38 schrieb Philip Yang:

Hi Christian,

FYI, remove amdgpu_bo_reserve(root, true) before calling 
amdgpu_vm_bo_update_mapping, I got this warning backtrace:


    [  182.390072] WARNING: CPU: 12 PID: 4376 at

/home/yangp/git/compute_staging/kernel/drivers/gpu/drm/ttm/ttm_bo.c:1229
    ttm_bo_validate+0x14d/0x1b0 [ttm]
    [  182.390085] Modules linked in: fuse ip6table_filter ip6_tables
    iptable_filter amdgpu amd_iommu_v2 gpu_sched ast drm_vram_helper
    drm_ttm_helper ttm k10temp ip_tables x_tables i2c_piix4
    [  182.390123] CPU: 12 PID: 4376 Comm: kfdtest Tainted: G    W
    5.4.0-rc7-kfd-yangp #1
    [  182.390133] Hardware name: GIGABYTE MZ01-CE0-00/MZ01-CE0-00, 
BIOS F12

    08/05/2019
    [  182.390146] RIP: 0010:ttm_bo_validate+0x14d/0x1b0 [ttm]
    [  182.390153] Code: 40 ff 52 18 8b 44 24 04 e9 4f ff ff ff 48 8b 
87 20
    01 00 00 be ff ff ff ff 48 8d 78 60 e8 5b 3a e1 d4 85 c0 0f 85 e7 
fe ff
    ff <0f> 0b e9 e0 fe ff ff be 01 00 00 00 48 89 df e8 2f c4 ff ff 
e9 19

    [  182.390161] RSP: 0018:ab7a032f3990 EFLAGS: 00010246
    [  182.390166] RAX:  RBX: 943c59b37850 RCX:
    943c59b35000
    [  182.390171] RDX: 943c539daf00 RSI: 943c56fb31d8 RDI:
    943c539db790
    [  182.390178] RBP: 943c59b37830 R08: 0200 R09:
    
    [  182.390184] R10:  R11: 001fee0e R12:
    ab7a032f3a50
    [  182.390194] R13: 0200 R14: 943c59b37800 R15:
    
    [  182.390197] FS:  7f0d27f41780() GS:943c9e90()
    knlGS:
    [  182.390203] CS:  0010 DS:  ES:  CR0: 80050033
    [  182.390209] CR2: 7fba7a1010a0 CR3: 0007f2624000 CR4:
    003406e0
    [  182.390212] Call Trace:
    [  182.390219]  ? rcu_read_lock_sched_held+0x52/0x80
    [  182.390223]  ? _raw_spin_unlock+0x24/0x30
    [  182.390267]  ? amdgpu_bo_do_create+0x4d1/0x5d0 [amdgpu]
    [  182.390319]  amdgpu_vm_clear_bo+0x13d/0x3a0 [amdgpu]
    [  182.390371]  ? amdgpu_vm_num_entries+0x1e/0x70 [amdgpu]
    [  182.390424]  amdgpu_vm_update_ptes+0x561/0x5d0 [amdgpu]
    [  182.390480]  amdgpu_vm_bo_update_mapping+0xfd/0x130 [amdgpu]
    [  182.390530]  amdgpu_vm_bo_split_mapping+0x1ea/0x2c0 [amdgpu]
    [  182.390591]  svm_range_map_to_gpus+0x160/0x310 [amdgpu]
    [  182.390650]  kfd_register_svm+0xb8/0x2b0 [amdgpu]
    [  182.390708]  kfd_ioctl_register_svm+0xe8/0x110 [amdgpu]
    [  182.390765]  kfd_ioctl+0x232/0x3d0 [amdgpu]
    [  182.390823]  ? kfd_ioctl_get_process_apertures_new+0x310/0x310
    [amdgpu]
    [  182.390838]  ? selinux_file_ioctl+0x153/0x210
    [  182.390845]  do_vfs_ioctl+0xa2/0x6e0
    [  182.390854]  ksys_ioctl+0x70/0x80
    [  182.390862]  __x64_sys_ioctl+0x16/0x20
    [  182.390869]  do_syscall_64+0x4a/0x1b0
    [  182.390879]  entry_SYSCALL_64_after_hwframe+0x49/0xbe

Philip

On 2019-12-12 3:51 a.m., Christian König wrote:

Hi Felix,

yeah, I've also found a corner case which would raise a warning now.

Need to rework how dependencies for the PTE update are generated.

Going to take care of this in the next few days,
Christian.

Am 12.12.19 um 01:20 schrieb Felix Kuehling:

Hi Christian,

Alex started trying to invalidate PTEs in the MMU notifiers and 
we're finding that we still need to reserve the VM reservation for 
amdgpu_sync_resv in amdgpu_vm_sdma_prepare. Is that sync_resv still 
needed now, given that VM fences aren't in that reservation object 
any more?


Regards,
  Felix

On 2019-12-05 5:39, Christian König wrote:

When a BO is evicted immedially invalidate the mapped PTEs.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 -
  1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 839d6df394fc..e578113bfd55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2565,6 +2565,7 @@ void amdgpu_vm_bo_invalidate(struct 
amdgpu_device *adev,

   struct amdgpu_bo *bo, bool evicted)
  {
  struct amdgpu_vm_bo_base *bo_base;
+    int r;
    /* shadow bo doesn't have bo base, its validation needs its 
parent */

  if (bo->parent && bo->parent->shadow == bo)
@@ -2572,8 +2573,22 @@ void amdgpu_vm_bo_invalidate(struct 
amdgpu_device *adev,

    for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
  struct amdgpu_vm *vm = bo_base->vm;
+    struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
+
+    if (bo->tbo.type != ttm_bo_type_kernel) {
+    struct amdgpu_bo_va *bo_va;
+
+    bo_va = 

Re: [PATCH 2/5] drm: Add Reusable task barrier.

2019-12-12 Thread Andrey Grodzovsky


On 12/12/19 10:09 AM, Christian König wrote:

Am 12.12.19 um 15:50 schrieb Grodzovsky, Andrey:

[AMD Official Use Only - Internal Distribution Only]

__
From: Christian König 
Sent: 12 December 2019 03:31
To: Alex Deucher; Grodzovsky, Andrey
Cc: Deucher, Alexander; Ma, Le; Quan, Evan; amd-gfx list; Zhang, Hawking
Subject: Re: [PATCH 2/5] drm: Add Reusable task barrier.

Am 12.12.19 um 09:24 schrieb Christian König:

Am 11.12.19 um 21:19 schrieb Alex Deucher:

On Wed, Dec 11, 2019 at 3:07 PM Andrey Grodzovsky
 wrote:

It is used to synchronize N threads at a rendevouz point before
execution
of critical code that has to be started by all the threads at
approximatly
the same time.

Signed-off-by: Andrey Grodzovsky 

You should resend to dri-devel since this task barrier is being added
to common code.

Additional to that this whole thing has the potential to raise lockdep
warnings and if I'm not completely mistaken doesn't even work 
correctly.

Can you give me a potential lockdep scenario ?


Lockdep usually complains if a lock is released from another thread 
than where it was locked from.


In the code you let each thread do a down() and then the last one does 
multiple up() calls.


But I think that is only illegal for mutexes, but legal for semaphores.

Christian.



Yes, from what I've read for semaphores it's ok to release (up) from a 
thread which didn't acquire (down)


Andrey






Andrey


See Linux kernel semaphores don't allow negative values (the count
field in struct semaphore is unsigned).

Ok, forget what I've wrote. That indeed seems to be supported, some
other drivers are already using semaphores the same way.

Regards,
Christian.


Regards,
Christian.


Alex


---
   include/drm/task_barrier.h | 106
+
   1 file changed, 106 insertions(+)
   create mode 100644 include/drm/task_barrier.h

diff --git a/include/drm/task_barrier.h b/include/drm/task_barrier.h
new file mode 100644
index 000..81fb0f7
--- /dev/null
+++ b/include/drm/task_barrier.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining a
+ * copy of this software and associated documentation files (the
"Software"),
+ * to deal in the Software without restriction, including without
limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
sublicense,
+ * and/or sell copies of the Software, and to permit persons to
whom the
+ * Software is furnished to do so, subject to the following
conditions:
+ *
+ * The above copyright notice and this permission notice shall be
included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include 
+#include 
+
+/*
+ * Reusable 2 PHASE task barrier (randevouz point) implementation
for N tasks.
+ * Based on the Little book of sempahores -
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgreenteapress.com%2Fwp%2Fsemaphores%2Fdata=02%7C01%7CAndrey.Grodzovsky%40amd.com%7Cdcd0f1a4cfa440d7b1ae08d77f154935%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637117601707306821sdata=X3%2Bg5XpFNrNbVka6WhB8TxjhOG1yuc%2Bk6%2FdcuO2Nlw0%3Dreserved=0 


+ */
+
+
+
+#ifndef DRM_TASK_BARRIER_H_
+#define DRM_TASK_BARRIER_H_
+
+/*
+ * Represents an instance of a task barrier.
+ */
+struct task_barrier {
+   unsigned int n;
+   atomic_t count;
+   struct semaphore enter_turnstile;
+   struct semaphore exit_turnstile;
+};
+
+static inline void task_barrier_signal_turnstile(struct semaphore
*turnstile,
+    unsigned int n)
+{
+   int i;
+
+   for (i = 0 ; i < n; i++)
+   up(turnstile);
+}
+
+static inline void task_barrier_init(struct task_barrier *tb)
+{
+   tb->n = 0;
+   atomic_set(>count, 0);
+   sema_init(>enter_turnstile, 0);
+   sema_init(>exit_turnstile, 0);
+}
+
+static inline void task_barrier_add_task(struct task_barrier *tb)
+{
+   tb->n++;
+}
+
+static inline void task_barrier_rem_task(struct task_barrier *tb)
+{
+   tb->n--;
+}
+
+/*
+ * Lines up all the threads BEFORE the critical point.
+ *
+ * When all thread passed this code the entry barrier is back to
locked state.
+ */
+static inline void task_barrier_enter(struct task_barrier *tb)
+{
+   if (atomic_inc_return(>count) == tb->n)
+ task_barrier_signal_turnstile(>enter_turnstile, tb->n);
+
+   down(>enter_turnstile);
+}
+
+/*
+ * 

Re: [PATCH 2/5] drm: Add Reusable task barrier.

2019-12-12 Thread Christian König

Am 12.12.19 um 15:50 schrieb Grodzovsky, Andrey:

[AMD Official Use Only - Internal Distribution Only]

__
From: Christian König 
Sent: 12 December 2019 03:31
To: Alex Deucher; Grodzovsky, Andrey
Cc: Deucher, Alexander; Ma, Le; Quan, Evan; amd-gfx list; Zhang, Hawking
Subject: Re: [PATCH 2/5] drm: Add Reusable task barrier.

Am 12.12.19 um 09:24 schrieb Christian König:

Am 11.12.19 um 21:19 schrieb Alex Deucher:

On Wed, Dec 11, 2019 at 3:07 PM Andrey Grodzovsky
 wrote:

It is used to synchronize N threads at a rendevouz point before
execution
of critical code that has to be started by all the threads at
approximatly
the same time.

Signed-off-by: Andrey Grodzovsky 

You should resend to dri-devel since this task barrier is being added
to common code.

Additional to that this whole thing has the potential to raise lockdep
warnings and if I'm not completely mistaken doesn't even work correctly.

Can you give me a potential lockdep scenario ?


Lockdep usually complains if a lock is released from another thread than 
where it was locked from.


In the code you let each thread do a down() and then the last one does 
multiple up() calls.


But I think that is only illegal for mutexes, but legal for semaphores.

Christian.



Andrey


See Linux kernel semaphores don't allow negative values (the count
field in struct semaphore is unsigned).

Ok, forget what I've wrote. That indeed seems to be supported, some
other drivers are already using semaphores the same way.

Regards,
Christian.


Regards,
Christian.


Alex


---
   include/drm/task_barrier.h | 106
+
   1 file changed, 106 insertions(+)
   create mode 100644 include/drm/task_barrier.h

diff --git a/include/drm/task_barrier.h b/include/drm/task_barrier.h
new file mode 100644
index 000..81fb0f7
--- /dev/null
+++ b/include/drm/task_barrier.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining a
+ * copy of this software and associated documentation files (the
"Software"),
+ * to deal in the Software without restriction, including without
limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
sublicense,
+ * and/or sell copies of the Software, and to permit persons to
whom the
+ * Software is furnished to do so, subject to the following
conditions:
+ *
+ * The above copyright notice and this permission notice shall be
included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include 
+#include 
+
+/*
+ * Reusable 2 PHASE task barrier (randevouz point) implementation
for N tasks.
+ * Based on the Little book of sempahores -
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgreenteapress.com%2Fwp%2Fsemaphores%2Fdata=02%7C01%7Candrey.grodzovsky%40amd.com%7C96f8c28ae4bd43f2922208d77eddb60f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637117363029062995sdata=6xXZXyDIKmQM8ET5hPIZ%2FbJrOMQqK4oYw8dGYS1rxcY%3Dreserved=0
+ */
+
+
+
+#ifndef DRM_TASK_BARRIER_H_
+#define DRM_TASK_BARRIER_H_
+
+/*
+ * Represents an instance of a task barrier.
+ */
+struct task_barrier {
+   unsigned int n;
+   atomic_t count;
+   struct semaphore enter_turnstile;
+   struct semaphore exit_turnstile;
+};
+
+static inline void task_barrier_signal_turnstile(struct semaphore
*turnstile,
+unsigned int n)
+{
+   int i;
+
+   for (i = 0 ; i < n; i++)
+   up(turnstile);
+}
+
+static inline void task_barrier_init(struct task_barrier *tb)
+{
+   tb->n = 0;
+   atomic_set(>count, 0);
+   sema_init(>enter_turnstile, 0);
+   sema_init(>exit_turnstile, 0);
+}
+
+static inline void task_barrier_add_task(struct task_barrier *tb)
+{
+   tb->n++;
+}
+
+static inline void task_barrier_rem_task(struct task_barrier *tb)
+{
+   tb->n--;
+}
+
+/*
+ * Lines up all the threads BEFORE the critical point.
+ *
+ * When all thread passed this code the entry barrier is back to
locked state.
+ */
+static inline void task_barrier_enter(struct task_barrier *tb)
+{
+   if (atomic_inc_return(>count) == tb->n)
+ task_barrier_signal_turnstile(>enter_turnstile, tb->n);
+
+   down(>enter_turnstile);
+}
+
+/*
+ * Lines up all the threads AFTER the critical point.
+ *
+ * This function is used to avoid any one thread running ahead of
the reset if
+ * the barrier is used in a loop 

Re: [PATCH] drm/amdkfd: queue kfd interrupt work to different CPU

2019-12-12 Thread Eric Huang

It fixes cpu stuck issue in some extreme test cases.

Reviewed-by: Eric Huang 

On 2019-12-12 9:51 a.m., Philip Yang wrote:

Because queue_work schedule the work on the same CPU the interrupt
handler is running, if there are many interrupts pending, it takes
longer time for work queue to start, or even worse system will hang.

Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 209bfc849352..ee2a9bb1cb07 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -844,7 +844,8 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void 
*ih_ring_entry)
   patched_ihre, _patched)
&& enqueue_ih_ring_entry(kfd,
 is_patched ? patched_ihre : ih_ring_entry))
-   queue_work(kfd->ih_wq, >interrupt_work);
+   queue_work_on((smp_processor_id() + 1) % num_online_cpus(),
+  kfd->ih_wq, >interrupt_work);
  
  	spin_unlock_irqrestore(>interrupt_lock, flags);

  }


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [RESEND PATCH 2/5] drm: Add Reusable task barrier.

2019-12-12 Thread Andrey Grodzovsky


On 12/11/19 11:04 PM, Ma, Le wrote:


[AMD Official Use Only - Internal Distribution Only]

-Original Message-
From: Andrey Grodzovsky 
Sent: Thursday, December 12, 2019 4:39 AM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Ma, Le 
; Zhang, Hawking ; Quan, Evan 
; Grodzovsky, Andrey 

Subject: [RESEND PATCH 2/5] drm: Add Reusable task barrier.

It is used to synchronize N threads at a rendevouz point before 
execution of critical code that has to be started by all the threads 
at approximatly the same time.


Signed-off-by: Andrey Grodzovsky >


---

include/drm/task_barrier.h | 106 
+


1 file changed, 106 insertions(+)

create mode 100644 include/drm/task_barrier.h

diff --git a/include/drm/task_barrier.h b/include/drm/task_barrier.h 
new file mode 100644 index 000..81fb0f7


--- /dev/null

+++ b/include/drm/task_barrier.h

@@ -0,0 +1,106 @@

+/*

+ * Copyright 2019 Advanced Micro Devices, Inc.

+ *

+ * Permission is hereby granted, free of charge, to any person

+obtaining a

+ * copy of this software and associated documentation files (the

+"Software"),

+ * to deal in the Software without restriction, including without

+limitation

+ * the rights to use, copy, modify, merge, publish, distribute,

+sublicense,

+ * and/or sell copies of the Software, and to permit persons to whom

+the

+ * Software is furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be

+included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

+EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

+MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT

+SHALL

+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,

+DAMAGES OR

+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR

+OTHERWISE,

+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE

+OR

+ * OTHER DEALINGS IN THE SOFTWARE.

+ *

+ */

+#include 

+#include 

+

+/*

+ * Reusable 2 PHASE task barrier (randevouz point) implementation for 
N tasks.


+ * Based on the Little book of sempahores -

+https://greenteapress.com/wp/semaphores/

+ */

+

+

+

+#ifndef DRM_TASK_BARRIER_H_

+#define DRM_TASK_BARRIER_H_

+

[Le]: It might be better to prefix “drm_” to the functions and 
structure below, even this header file name.




I am not sure about this - see the example of spsc_queue we added for 
GPU scheduler use. I just followed it as an example of where to place 
the structure. There is nothing DRM specific about spsc_queue or 
task_barrier, they are generic constructs that we place in DRM subsystem 
for common use.




+/*

+ * Represents an instance of a task barrier.

+ */

+struct task_barrier {

+  unsigned int n;

[Le]: We can define it as signed type here for more common use.



This is a counter of number of tasks/threads to synchronize in the 
barrier it cannot go bellow 0


Andrey



+  atomic_t count;

+  struct semaphore enter_turnstile;

+  struct semaphore exit_turnstile;

+};

+

+static inline void task_barrier_signal_turnstile(struct semaphore 
*turnstile,


+ unsigned int n)

+{

+  int i;

+

+  for (i = 0 ; i < n; i++)

+  up(turnstile);

+}

+

+static inline void task_barrier_init(struct task_barrier *tb) {

+  tb->n = 0;

+  atomic_set(>count, 0);

+ sema_init(>enter_turnstile, 0);

+ sema_init(>exit_turnstile, 0);

+}

+

+static inline void task_barrier_add_task(struct task_barrier *tb) {

+  tb->n++;

+}

+

+static inline void task_barrier_rem_task(struct task_barrier *tb) {

+  tb->n--;

+}

+

+/*

+ * Lines up all the threads BEFORE the critical point.

+ *

+ * When all thread passed this code the entry barrier is back to 
locked state.


+ */

+static inline void task_barrier_enter(struct task_barrier *tb) {

+  if (atomic_inc_return(>count) == tb->n)

+ task_barrier_signal_turnstile(>enter_turnstile, tb->n);

+

+ down(>enter_turnstile);

+}

+

+/*

+ * Lines up all the threads AFTER the critical point.

+ *

+ * This function is used to avoid any one thread running ahead of the

+reset if

[Le]: No need to mention “reset” here.

With the above addressed, Acked-by: Le Ma le...@amd.com 



Regards,

Ma Le

+ * the barrier is used in a loop (repeatedly) .

+ */

+static inline void task_barrier_exit(struct task_barrier *tb) {

+  if (atomic_dec_return(>count) == 0)

+ task_barrier_signal_turnstile(>exit_turnstile, tb->n);

+

+ down(>exit_turnstile);

+}

+

+static inline void task_barrier_full(struct task_barrier *tb) {

+  task_barrier_enter(tb);

+  task_barrier_exit(tb);

+}

+

+#endif

--


[PATCH] drm/amdkfd: queue kfd interrupt work to different CPU

2019-12-12 Thread Philip Yang
Because queue_work schedule the work on the same CPU the interrupt
handler is running, if there are many interrupts pending, it takes
longer time for work queue to start, or even worse system will hang.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 209bfc849352..ee2a9bb1cb07 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -844,7 +844,8 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void 
*ih_ring_entry)
   patched_ihre, _patched)
&& enqueue_ih_ring_entry(kfd,
 is_patched ? patched_ihre : ih_ring_entry))
-   queue_work(kfd->ih_wq, >interrupt_work);
+   queue_work_on((smp_processor_id() + 1) % num_online_cpus(),
+  kfd->ih_wq, >interrupt_work);
 
spin_unlock_irqrestore(>interrupt_lock, flags);
 }
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/5] drm: Add Reusable task barrier.

2019-12-12 Thread Grodzovsky, Andrey
[AMD Official Use Only - Internal Distribution Only]

__
From: Christian König 
Sent: 12 December 2019 03:31
To: Alex Deucher; Grodzovsky, Andrey
Cc: Deucher, Alexander; Ma, Le; Quan, Evan; amd-gfx list; Zhang, Hawking
Subject: Re: [PATCH 2/5] drm: Add Reusable task barrier.

Am 12.12.19 um 09:24 schrieb Christian König:
> Am 11.12.19 um 21:19 schrieb Alex Deucher:
>> On Wed, Dec 11, 2019 at 3:07 PM Andrey Grodzovsky
>>  wrote:
>>> It is used to synchronize N threads at a rendevouz point before
>>> execution
>>> of critical code that has to be started by all the threads at
>>> approximatly
>>> the same time.
>>>
>>> Signed-off-by: Andrey Grodzovsky 
>> You should resend to dri-devel since this task barrier is being added
>> to common code.
>
> Additional to that this whole thing has the potential to raise lockdep
> warnings and if I'm not completely mistaken doesn't even work correctly.

Can you give me a potential lockdep scenario ?

Andrey

>
> See Linux kernel semaphores don't allow negative values (the count
> field in struct semaphore is unsigned).

Ok, forget what I've wrote. That indeed seems to be supported, some
other drivers are already using semaphores the same way.

Regards,
Christian.

>
> Regards,
> Christian.
>
>>
>> Alex
>>
>>> ---
>>>   include/drm/task_barrier.h | 106
>>> +
>>>   1 file changed, 106 insertions(+)
>>>   create mode 100644 include/drm/task_barrier.h
>>>
>>> diff --git a/include/drm/task_barrier.h b/include/drm/task_barrier.h
>>> new file mode 100644
>>> index 000..81fb0f7
>>> --- /dev/null
>>> +++ b/include/drm/task_barrier.h
>>> @@ -0,0 +1,106 @@
>>> +/*
>>> + * Copyright 2019 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person
>>> obtaining a
>>> + * copy of this software and associated documentation files (the
>>> "Software"),
>>> + * to deal in the Software without restriction, including without
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to
>>> whom the
>>> + * Software is furnished to do so, subject to the following
>>> conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be
>>> included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>>> DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>> OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>>> USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + */
>>> +#include 
>>> +#include 
>>> +
>>> +/*
>>> + * Reusable 2 PHASE task barrier (randevouz point) implementation
>>> for N tasks.
>>> + * Based on the Little book of sempahores -
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgreenteapress.com%2Fwp%2Fsemaphores%2Fdata=02%7C01%7Candrey.grodzovsky%40amd.com%7C96f8c28ae4bd43f2922208d77eddb60f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637117363029062995sdata=6xXZXyDIKmQM8ET5hPIZ%2FbJrOMQqK4oYw8dGYS1rxcY%3Dreserved=0
>>> + */
>>> +
>>> +
>>> +
>>> +#ifndef DRM_TASK_BARRIER_H_
>>> +#define DRM_TASK_BARRIER_H_
>>> +
>>> +/*
>>> + * Represents an instance of a task barrier.
>>> + */
>>> +struct task_barrier {
>>> +   unsigned int n;
>>> +   atomic_t count;
>>> +   struct semaphore enter_turnstile;
>>> +   struct semaphore exit_turnstile;
>>> +};
>>> +
>>> +static inline void task_barrier_signal_turnstile(struct semaphore
>>> *turnstile,
>>> +unsigned int n)
>>> +{
>>> +   int i;
>>> +
>>> +   for (i = 0 ; i < n; i++)
>>> +   up(turnstile);
>>> +}
>>> +
>>> +static inline void task_barrier_init(struct task_barrier *tb)
>>> +{
>>> +   tb->n = 0;
>>> +   atomic_set(>count, 0);
>>> +   sema_init(>enter_turnstile, 0);
>>> +   sema_init(>exit_turnstile, 0);
>>> +}
>>> +
>>> +static inline void task_barrier_add_task(struct task_barrier *tb)
>>> +{
>>> +   tb->n++;
>>> +}
>>> +
>>> +static inline void task_barrier_rem_task(struct task_barrier *tb)
>>> +{
>>> +   tb->n--;
>>> +}
>>> +
>>> +/*
>>> + * Lines up all the threads BEFORE the critical point.
>>> + *
>>> + * When all thread passed this code the entry barrier is back to
>>> locked state.
>>> + */
>>> +static inline void task_barrier_enter(struct task_barrier *tb)
>>> +{
>>> +   if (atomic_inc_return(>count) == tb->n)
>>> + task_barrier_signal_turnstile(>enter_turnstile, tb->n);
>>> +
>>> +   down(>enter_turnstile);
>>> +}
>>> +
>>> +/*
>>> + 

Re: [PATCH 5/5] drm/amdgpu: immedially invalidate PTEs

2019-12-12 Thread Philip Yang

Hi Christian,

FYI, remove amdgpu_bo_reserve(root, true) before calling 
amdgpu_vm_bo_update_mapping, I got this warning backtrace:


[  182.390072] WARNING: CPU: 12 PID: 4376 at

/home/yangp/git/compute_staging/kernel/drivers/gpu/drm/ttm/ttm_bo.c:1229
ttm_bo_validate+0x14d/0x1b0 [ttm]
[  182.390085] Modules linked in: fuse ip6table_filter ip6_tables
iptable_filter amdgpu amd_iommu_v2 gpu_sched ast drm_vram_helper
drm_ttm_helper ttm k10temp ip_tables x_tables i2c_piix4
[  182.390123] CPU: 12 PID: 4376 Comm: kfdtest Tainted: GW
5.4.0-rc7-kfd-yangp #1
[  182.390133] Hardware name: GIGABYTE MZ01-CE0-00/MZ01-CE0-00, 
BIOS F12

08/05/2019
[  182.390146] RIP: 0010:ttm_bo_validate+0x14d/0x1b0 [ttm]
[  182.390153] Code: 40 ff 52 18 8b 44 24 04 e9 4f ff ff ff 48 8b 87 20
01 00 00 be ff ff ff ff 48 8d 78 60 e8 5b 3a e1 d4 85 c0 0f 85 e7 fe ff
ff <0f> 0b e9 e0 fe ff ff be 01 00 00 00 48 89 df e8 2f c4 ff ff e9 19
[  182.390161] RSP: 0018:ab7a032f3990 EFLAGS: 00010246
[  182.390166] RAX:  RBX: 943c59b37850 RCX:
943c59b35000
[  182.390171] RDX: 943c539daf00 RSI: 943c56fb31d8 RDI:
943c539db790
[  182.390178] RBP: 943c59b37830 R08: 0200 R09:

[  182.390184] R10:  R11: 001fee0e R12:
ab7a032f3a50
[  182.390194] R13: 0200 R14: 943c59b37800 R15:

[  182.390197] FS:  7f0d27f41780() GS:943c9e90()
knlGS:
[  182.390203] CS:  0010 DS:  ES:  CR0: 80050033
[  182.390209] CR2: 7fba7a1010a0 CR3: 0007f2624000 CR4:
003406e0
[  182.390212] Call Trace:
[  182.390219]  ? rcu_read_lock_sched_held+0x52/0x80
[  182.390223]  ? _raw_spin_unlock+0x24/0x30
[  182.390267]  ? amdgpu_bo_do_create+0x4d1/0x5d0 [amdgpu]
[  182.390319]  amdgpu_vm_clear_bo+0x13d/0x3a0 [amdgpu]
[  182.390371]  ? amdgpu_vm_num_entries+0x1e/0x70 [amdgpu]
[  182.390424]  amdgpu_vm_update_ptes+0x561/0x5d0 [amdgpu]
[  182.390480]  amdgpu_vm_bo_update_mapping+0xfd/0x130 [amdgpu]
[  182.390530]  amdgpu_vm_bo_split_mapping+0x1ea/0x2c0 [amdgpu]
[  182.390591]  svm_range_map_to_gpus+0x160/0x310 [amdgpu]
[  182.390650]  kfd_register_svm+0xb8/0x2b0 [amdgpu]
[  182.390708]  kfd_ioctl_register_svm+0xe8/0x110 [amdgpu]
[  182.390765]  kfd_ioctl+0x232/0x3d0 [amdgpu]
[  182.390823]  ? kfd_ioctl_get_process_apertures_new+0x310/0x310
[amdgpu]
[  182.390838]  ? selinux_file_ioctl+0x153/0x210
[  182.390845]  do_vfs_ioctl+0xa2/0x6e0
[  182.390854]  ksys_ioctl+0x70/0x80
[  182.390862]  __x64_sys_ioctl+0x16/0x20
[  182.390869]  do_syscall_64+0x4a/0x1b0
[  182.390879]  entry_SYSCALL_64_after_hwframe+0x49/0xbe

Philip

On 2019-12-12 3:51 a.m., Christian König wrote:

Hi Felix,

yeah, I've also found a corner case which would raise a warning now.

Need to rework how dependencies for the PTE update are generated.

Going to take care of this in the next few days,
Christian.

Am 12.12.19 um 01:20 schrieb Felix Kuehling:

Hi Christian,

Alex started trying to invalidate PTEs in the MMU notifiers and we're 
finding that we still need to reserve the VM reservation for 
amdgpu_sync_resv in amdgpu_vm_sdma_prepare. Is that sync_resv still 
needed now, given that VM fences aren't in that reservation object any 
more?


Regards,
  Felix

On 2019-12-05 5:39, Christian König wrote:

When a BO is evicted immedially invalidate the mapped PTEs.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 -
  1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 839d6df394fc..e578113bfd55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2565,6 +2565,7 @@ void amdgpu_vm_bo_invalidate(struct 
amdgpu_device *adev,

   struct amdgpu_bo *bo, bool evicted)
  {
  struct amdgpu_vm_bo_base *bo_base;
+    int r;
    /* shadow bo doesn't have bo base, its validation needs its 
parent */

  if (bo->parent && bo->parent->shadow == bo)
@@ -2572,8 +2573,22 @@ void amdgpu_vm_bo_invalidate(struct 
amdgpu_device *adev,

    for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
  struct amdgpu_vm *vm = bo_base->vm;
+    struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
+
+    if (bo->tbo.type != ttm_bo_type_kernel) {
+    struct amdgpu_bo_va *bo_va;
+
+    bo_va = container_of(bo_base, struct amdgpu_bo_va,
+ base);
+    r = amdgpu_vm_bo_update(adev, bo_va,
+    bo->tbo.base.resv != resv);
+    if (!r) {
+    amdgpu_vm_bo_idle(bo_base);
+    continue;
+    }
+    }
  -    if 

Re: [pull] amdgpu, amdkfd, radeon drm-next-5.6

2019-12-12 Thread Mike Lothian
Hi

Please can amdgpu/raven_ta.bin be published somewhere

Thanks

Mike

On Wed, 11 Dec 2019 at 22:30, Alex Deucher  wrote:
>
> Hi Dave, Daniel,
>
> Kicking off 5.6 with new stuff from AMD.  There is a UAPI addition.  We
> added a new firmware for display, and this just adds the version query
> to our existing firmware query interface.  UMDs like mesa use this interface 
> to
> query things like CP or UVD firmware versions to see what features are
> supported.
>
> The following changes since commit 622b2a0ab647d2755f2c1f1000d3403e86a69763:
>
>   drm/amdgpu/vcn: finish delay work before release resources (2019-11-13 
> 15:29:42 -0500)
>
> are available in the Git repository at:
>
>   git://people.freedesktop.org/~agd5f/linux tags/drm-next-5.6-2019-12-11
>
> for you to fetch changes up to ad808910be68dcf8da5d837d4511d00ad5d3678a:
>
>   drm/amdgpu: fix license on Kconfig and Makefiles (2019-12-11 15:22:08 -0500)
>
> 
> drm-next-5.6-2019-12-11:
>
> amdgpu:
> - Add MST atomic routines
> - Add support for DMCUB (new helper microengine for displays)
> - Add OEM i2c support in DC
> - Use vstartup for vblank events on DCN
> - Simplify Kconfig for DC
> - Renoir fixes for DC
> - Clean up function pointers in DC
> - Initial support for HDCP 2.x
> - Misc code cleanups
> - GFX10 fixes
> - Rework JPEG engine handling for VCN
> - Add clock and power gating support for JPEG
> - BACO support for Arcturus
> - Cleanup PSP ring handling
> - Add framework for using BACO with runtime pm to save power
> - Move core pci state handling out of the driver for pm ops
> - Allow guest power control in 1 VF case with SR-IOV
> - SR-IOV fixes
> - RAS fixes
> - Support for power metrics on renoir
> - Golden settings updates for gfx10
> - Enable gfxoff on supported navi10 skus
> - Update MAINTAINERS
>
> amdkfd:
> - Clean up generational gfx code
> - Fixes for gfx10
> - DIQ fixes
> - Share more code with amdgpu
>
> radeon:
> - PPC DMA fix
> - Register checker fixes for r1xx/r2xx
> - Misc cleanups
>
> 
> Alex Deucher (34):
>   drm/amdgpu/display: fix the build when CONFIG_DRM_AMD_DC_DCN is not set
>   drm/amdgpu/display: fix warning when CONFIG_DRM_AMD_DC_DCN is not set
>   drm/amdgpu/soc15: move struct definition around to align with other 
> soc15 asics
>   drm/amdgpu/nv: add asic func for fetching vbios from rom directly
>   drm/amdgpu/powerplay: properly set PP_GFXOFF_MASK (v2)
>   drm/amdgpu: disable gfxoff when using register read interface
>   drm/amdgpu: remove experimental flag for Navi14
>   drm/amdgpu: disable gfxoff on original raven
>   Revert "drm/amd/display: enable S/G for RAVEN chip"
>   drm/amdgpu: add asic callback for BACO support
>   drm/amdgpu: add supports_baco callback for soc15 asics. (v2)
>   drm/amdgpu: add supports_baco callback for SI asics.
>   drm/amdgpu: add supports_baco callback for CIK asics.
>   drm/amdgpu: add supports_baco callback for VI asics.
>   drm/amdgpu: add supports_baco callback for NV asics.
>   drm/amdgpu: add a amdgpu_device_supports_baco helper
>   drm/amdgpu: rename amdgpu_device_is_px to amdgpu_device_supports_boco 
> (v2)
>   drm/amdgpu: add additional boco checks to runtime suspend/resume (v2)
>   drm/amdgpu: split swSMU baco_reset into enter and exit
>   drm/amdgpu: add helpers for baco entry and exit
>   drm/amdgpu: add baco support to runtime suspend/resume
>   drm/amdgpu: start to disentangle boco from runtime pm
>   drm/amdgpu: disentangle runtime pm and vga_switcheroo
>   drm/amdgpu: enable runtime pm on BACO capable boards if runpm=1
>   drm/amdgpu: simplify runtime suspend
>   drm/amd/display: add default clocks if not able to fetch them
>   MAINTAINERS: Drop Rex Zhu for amdgpu powerplay
>   drm/amdgpu: move pci handling out of pm ops
>   drm/amdgpu: flag vram lost on baco reset for VI/CIK
>   drm/amd/display: re-enable wait in pipelock, but add timeout
>   drm/radeon: fix r1xx/r2xx register checker for POT textures
>   drm/amdgpu: add header line for power profile on Arcturus
>   drm/amdgpu/display: add fallthrough comment
>   drm/amdgpu: fix license on Kconfig and Makefiles
>
> Alex Sierra (2):
>   drm/amdgpu: add flag to indicate amdgpu vm context
>   amd/amdgpu: force to trigger a no-retry-fault after a retry-fault
>
> Alvin Lee (1):
>   drm/amd/display: Changes in dc to allow full update in some cases
>
> Amanda Liu (1):
>   drm/amd/display: Fix screen tearing on vrr tests
>
> Andrey Grodzovsky (1):
>   drm/amdgpu: Fix BACO entry failure in NAVI10.
>
> Anthony Koo (8):
>   drm/amd/display: set MSA MISC1 bit 6 while sending colorimetry in VSC 
> SDP
>   drm/amd/display: Clean up some code with unused registers
>   drm/amd/display: cleanup of construct and destruct funcs
>   

Re: [PATCH] drm/amdgpu: enable gfxoff for raven1 refresh

2019-12-12 Thread Huang, Ray
[AMD Official Use Only - Internal Distribution Only]

On Thu, Dec 12, 2019 at 06:01:55PM +0800, Zhu, Changfeng wrote:
> From: changzhu 
> 
> When smu version is larger than 0x41e2b, it will load
> raven_kicker_rlc.bin.To enable gfxoff for raven_kicker_rlc.bin,it
> needs to avoid adev->pm.pp_feature &= ~PP_GFXOFF_MASK when it loads
> raven_kicker_rlc.bin.
> 
> Change-Id: I4dffa1783c9ceb5d40df9756d821e2cd7feff84d
> Signed-off-by: changzhu 
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 13 -
>  1 file changed, 4 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ea58d0e5be4c..56a38d67a949 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1038,17 +1038,12 @@ static void gfx_v9_0_check_if_need_gfxoff(struct 
> amdgpu_device *adev)
>   case CHIP_VEGA20:
>   break;
>   case CHIP_RAVEN:
> - /* Disable GFXOFF on original raven.  There are combinations
> -  * of sbios and platforms that are not stable.
> -  */

Please add comments that only enable gfxoff on raven kicker so far.


> - if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
> - adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
> - else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
> -  &&((adev->gfx.rlc_fw_version != 106 &&
> -  adev->gfx.rlc_fw_version < 531) ||
> + if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
> +  &&((adev->gfx.rlc_fw_version < 531) ||
>   (adev->gfx.rlc_fw_version == 53815) ||
>   (adev->gfx.rlc_feature_version < 1) ||
> + !adev->gfx.rlc.is_rlc_v2_1)
> +  &&(adev->pm.fw_version < 0x41e2b))

I think the if should be below:

if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
&& adev->pm.fw_version < 0x41e2b /* not raven1 fresh */
|| !adev->gfx.rlc.is_rlc_v2_1) /* without rlc save 
restore ucodes */

Thanks,
Ray

>   adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
>  
>   if (adev->pm.pp_feature & PP_GFXOFF_MASK)
> -- 
> 2.17.1
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: enable gfxoff for raven1 refresh

2019-12-12 Thread Changfeng.Zhu
From: changzhu 

When smu version is larger than 0x41e2b, it will load
raven_kicker_rlc.bin.To enable gfxoff for raven_kicker_rlc.bin,it
needs to avoid adev->pm.pp_feature &= ~PP_GFXOFF_MASK when it loads
raven_kicker_rlc.bin.

Change-Id: I4dffa1783c9ceb5d40df9756d821e2cd7feff84d
Signed-off-by: changzhu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ea58d0e5be4c..56a38d67a949 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1038,17 +1038,12 @@ static void gfx_v9_0_check_if_need_gfxoff(struct 
amdgpu_device *adev)
case CHIP_VEGA20:
break;
case CHIP_RAVEN:
-   /* Disable GFXOFF on original raven.  There are combinations
-* of sbios and platforms that are not stable.
-*/
-   if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
-   adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
-   else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
-&&((adev->gfx.rlc_fw_version != 106 &&
-adev->gfx.rlc_fw_version < 531) ||
+   if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
+&&((adev->gfx.rlc_fw_version < 531) ||
(adev->gfx.rlc_fw_version == 53815) ||
(adev->gfx.rlc_feature_version < 1) ||
-   !adev->gfx.rlc.is_rlc_v2_1))
+   !adev->gfx.rlc.is_rlc_v2_1)
+&&(adev->pm.fw_version < 0x41e2b))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 
if (adev->pm.pp_feature & PP_GFXOFF_MASK)
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [GIT PULL] Please pull hmm changes

2019-12-12 Thread Jason Gunthorpe
On Thu, Dec 05, 2019 at 11:03:24AM -0500, Jerome Glisse wrote:

> > struct mmu_notifier_mm (ie the mm->mmu_notifier_mm)
> >-> mmn_mm
> > struct mm_struct 
> >-> mm
> > struct mmu_notifier (ie the user subscription to the mm_struct)
> >-> mn
> > struct mmu_interval_notifier (the other kind of user subscription)
> >-> mni
> 
> What about "interval" the context should already tell people
> it is related to mmu notifier and thus a notifier. I would
> just remove the notifier suffix, this would match the below
> range.

Interval could be a good replacement for mni in the mm/mmu_notififer
file if we don't do the wholesale rename

> > I think it would be overall nicer with better names for the original
> > structs. Perhaps:
> > 
> >  mmn_* - MMU notifier prefix
> >  mmn_state <- struct mmu_notifier_mm
> >  mmn_subscription (mmn_sub) <- struct mmu_notifier
> >  mmn_range_subscription (mmn_range_sub) <- struct mmu_interval_notifier
> >  mmn_invalidate_desc <- struct mmu_notifier_range
> 
> This looks good.

Well, lets just bite the bullet then and switch it. Do you like
'state'? I thought that was the weakest one

We could use mmnotif as the prefix, this makes the longest:

  struct mmnotif_range_subscription

Which is reasonable enough

> Maybe we can do a semantic patch to do convertion and then Linus
> can easily apply the patch by just re-running the coccinelle.

I tried this last time I renamed everything, it was OK, but it missed
updating the comments. So it still needs some by-hand helping.

I'll make some patches next week when I get back.

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [GIT PULL] Please pull hmm changes

2019-12-12 Thread Jason Gunthorpe
On Thu, Dec 05, 2019 at 03:03:56PM -0800, John Hubbard wrote:

> No advice, just a naming idea similar in spirit to Jerome's suggestion
> (use a longer descriptive word, and don't try to capture the entire phrase):
> use "notif" in place of the unloved "mmn". So partially, approximately like 
> this:
> 
> notif_*<- MMU notifier prefix
> notif_state<- struct mmu_notifier_mm
> notif_subscription (notif_sub) <- struct mmu_notifier
> notif_invalidate_desc  <- struct mmu_notifier_range*
> notif_range_subscription (notif_range_sub) <- struct mmu_interval_notifier

To me 'notif' suggests this belongs to the stuff in notifier.h - ie
the naked word notififer is already taken

Jason
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 5/5] drm/amdgpu: immedially invalidate PTEs

2019-12-12 Thread Christian König

Hi Felix,

yeah, I've also found a corner case which would raise a warning now.

Need to rework how dependencies for the PTE update are generated.

Going to take care of this in the next few days,
Christian.

Am 12.12.19 um 01:20 schrieb Felix Kuehling:

Hi Christian,

Alex started trying to invalidate PTEs in the MMU notifiers and we're 
finding that we still need to reserve the VM reservation for 
amdgpu_sync_resv in amdgpu_vm_sdma_prepare. Is that sync_resv still 
needed now, given that VM fences aren't in that reservation object any 
more?


Regards,
  Felix

On 2019-12-05 5:39, Christian König wrote:

When a BO is evicted immedially invalidate the mapped PTEs.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 -
  1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 839d6df394fc..e578113bfd55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2565,6 +2565,7 @@ void amdgpu_vm_bo_invalidate(struct 
amdgpu_device *adev,

   struct amdgpu_bo *bo, bool evicted)
  {
  struct amdgpu_vm_bo_base *bo_base;
+    int r;
    /* shadow bo doesn't have bo base, its validation needs its 
parent */

  if (bo->parent && bo->parent->shadow == bo)
@@ -2572,8 +2573,22 @@ void amdgpu_vm_bo_invalidate(struct 
amdgpu_device *adev,

    for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
  struct amdgpu_vm *vm = bo_base->vm;
+    struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
+
+    if (bo->tbo.type != ttm_bo_type_kernel) {
+    struct amdgpu_bo_va *bo_va;
+
+    bo_va = container_of(bo_base, struct amdgpu_bo_va,
+ base);
+    r = amdgpu_vm_bo_update(adev, bo_va,
+    bo->tbo.base.resv != resv);
+    if (!r) {
+    amdgpu_vm_bo_idle(bo_base);
+    continue;
+    }
+    }
  -    if (evicted && bo->tbo.base.resv == 
vm->root.base.bo->tbo.base.resv) {

+    if (evicted && bo->tbo.base.resv == resv) {
  amdgpu_vm_bo_evicted(bo_base);
  continue;
  }


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/5] drm: Add Reusable task barrier.

2019-12-12 Thread Christian König

Am 12.12.19 um 09:24 schrieb Christian König:

Am 11.12.19 um 21:19 schrieb Alex Deucher:

On Wed, Dec 11, 2019 at 3:07 PM Andrey Grodzovsky
 wrote:
It is used to synchronize N threads at a rendevouz point before 
execution
of critical code that has to be started by all the threads at 
approximatly

the same time.

Signed-off-by: Andrey Grodzovsky 

You should resend to dri-devel since this task barrier is being added
to common code.


Additional to that this whole thing has the potential to raise lockdep 
warnings and if I'm not completely mistaken doesn't even work correctly.


See Linux kernel semaphores don't allow negative values (the count 
field in struct semaphore is unsigned).


Ok, forget what I've wrote. That indeed seems to be supported, some 
other drivers are already using semaphores the same way.


Regards,
Christian.



Regards,
Christian.



Alex


---
  include/drm/task_barrier.h | 106 
+

  1 file changed, 106 insertions(+)
  create mode 100644 include/drm/task_barrier.h

diff --git a/include/drm/task_barrier.h b/include/drm/task_barrier.h
new file mode 100644
index 000..81fb0f7
--- /dev/null
+++ b/include/drm/task_barrier.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to 
whom the
+ * Software is furnished to do so, subject to the following 
conditions:

+ *
+ * The above copyright notice and this permission notice shall be 
included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
USE OR

+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include 
+#include 
+
+/*
+ * Reusable 2 PHASE task barrier (randevouz point) implementation 
for N tasks.
+ * Based on the Little book of sempahores - 
https://greenteapress.com/wp/semaphores/

+ */
+
+
+
+#ifndef DRM_TASK_BARRIER_H_
+#define DRM_TASK_BARRIER_H_
+
+/*
+ * Represents an instance of a task barrier.
+ */
+struct task_barrier {
+   unsigned int n;
+   atomic_t count;
+   struct semaphore enter_turnstile;
+   struct semaphore exit_turnstile;
+};
+
+static inline void task_barrier_signal_turnstile(struct semaphore 
*turnstile,

+    unsigned int n)
+{
+   int i;
+
+   for (i = 0 ; i < n; i++)
+   up(turnstile);
+}
+
+static inline void task_barrier_init(struct task_barrier *tb)
+{
+   tb->n = 0;
+   atomic_set(>count, 0);
+   sema_init(>enter_turnstile, 0);
+   sema_init(>exit_turnstile, 0);
+}
+
+static inline void task_barrier_add_task(struct task_barrier *tb)
+{
+   tb->n++;
+}
+
+static inline void task_barrier_rem_task(struct task_barrier *tb)
+{
+   tb->n--;
+}
+
+/*
+ * Lines up all the threads BEFORE the critical point.
+ *
+ * When all thread passed this code the entry barrier is back to 
locked state.

+ */
+static inline void task_barrier_enter(struct task_barrier *tb)
+{
+   if (atomic_inc_return(>count) == tb->n)
+ task_barrier_signal_turnstile(>enter_turnstile, tb->n);
+
+   down(>enter_turnstile);
+}
+
+/*
+ * Lines up all the threads AFTER the critical point.
+ *
+ * This function is used to avoid any one thread running ahead of 
the reset if

+ * the barrier is used in a loop (repeatedly) .
+ */
+static inline void task_barrier_exit(struct task_barrier *tb)
+{
+   if (atomic_dec_return(>count) == 0)
+ task_barrier_signal_turnstile(>exit_turnstile, tb->n);
+
+   down(>exit_turnstile);
+}
+
+static inline void task_barrier_full(struct task_barrier *tb)
+{
+   task_barrier_enter(tb);
+   task_barrier_exit(tb);
+}
+
+#endif
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/5] drm: Add Reusable task barrier.

2019-12-12 Thread Christian König

Am 11.12.19 um 21:19 schrieb Alex Deucher:

On Wed, Dec 11, 2019 at 3:07 PM Andrey Grodzovsky
 wrote:

It is used to synchronize N threads at a rendevouz point before execution
of critical code that has to be started by all the threads at approximatly
the same time.

Signed-off-by: Andrey Grodzovsky 

You should resend to dri-devel since this task barrier is being added
to common code.


Additional to that this whole thing has the potential to raise lockdep 
warnings and if I'm not completely mistaken doesn't even work correctly.


See Linux kernel semaphores don't allow negative values (the count field 
in struct semaphore is unsigned).


Regards,
Christian.



Alex


---
  include/drm/task_barrier.h | 106 +
  1 file changed, 106 insertions(+)
  create mode 100644 include/drm/task_barrier.h

diff --git a/include/drm/task_barrier.h b/include/drm/task_barrier.h
new file mode 100644
index 000..81fb0f7
--- /dev/null
+++ b/include/drm/task_barrier.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include 
+#include 
+
+/*
+ * Reusable 2 PHASE task barrier (randevouz point) implementation for N tasks.
+ * Based on the Little book of sempahores - 
https://greenteapress.com/wp/semaphores/
+ */
+
+
+
+#ifndef DRM_TASK_BARRIER_H_
+#define DRM_TASK_BARRIER_H_
+
+/*
+ * Represents an instance of a task barrier.
+ */
+struct task_barrier {
+   unsigned int n;
+   atomic_t count;
+   struct semaphore enter_turnstile;
+   struct semaphore exit_turnstile;
+};
+
+static inline void task_barrier_signal_turnstile(struct semaphore *turnstile,
+unsigned int n)
+{
+   int i;
+
+   for (i = 0 ; i < n; i++)
+   up(turnstile);
+}
+
+static inline void task_barrier_init(struct task_barrier *tb)
+{
+   tb->n = 0;
+   atomic_set(>count, 0);
+   sema_init(>enter_turnstile, 0);
+   sema_init(>exit_turnstile, 0);
+}
+
+static inline void task_barrier_add_task(struct task_barrier *tb)
+{
+   tb->n++;
+}
+
+static inline void task_barrier_rem_task(struct task_barrier *tb)
+{
+   tb->n--;
+}
+
+/*
+ * Lines up all the threads BEFORE the critical point.
+ *
+ * When all thread passed this code the entry barrier is back to locked state.
+ */
+static inline void task_barrier_enter(struct task_barrier *tb)
+{
+   if (atomic_inc_return(>count) == tb->n)
+   task_barrier_signal_turnstile(>enter_turnstile, tb->n);
+
+   down(>enter_turnstile);
+}
+
+/*
+ * Lines up all the threads AFTER the critical point.
+ *
+ * This function is used to avoid any one thread running ahead of the reset if
+ * the barrier is used in a loop (repeatedly) .
+ */
+static inline void task_barrier_exit(struct task_barrier *tb)
+{
+   if (atomic_dec_return(>count) == 0)
+   task_barrier_signal_turnstile(>exit_turnstile, tb->n);
+
+   down(>exit_turnstile);
+}
+
+static inline void task_barrier_full(struct task_barrier *tb)
+{
+   task_barrier_enter(tb);
+   task_barrier_exit(tb);
+}
+
+#endif
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: add JPEG check to VCN idle handler and begin use

2019-12-12 Thread Christian König

Am 11.12.19 um 20:48 schrieb Leo Liu:

Since it's only needed with VCN1.0 when HW has no its
own JPEG HW IP block


Wouldn't it be simpler/cleaner to just define a 
vcn_v1_0_ring_begin_use() and vcn_v1_0_idle_work_handler() instead?


Regards,
Christian.



Signed-off-by: Leo Liu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 29 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
  2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 428cfd58b37d..95ac721f2de0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -186,6 +186,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
}
}
  
+	adev->vcn.has_jpeg_block = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?

+   true : false;
+
return 0;
  }
  
@@ -306,15 +309,17 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)

else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
  
-			if (amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec))

-   new_state.jpeg = VCN_DPG_STATE__PAUSE;
-   else
-   new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
-
+   if (!adev->vcn.has_jpeg_block) {
+   if 
(amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec))
+   new_state.jpeg = VCN_DPG_STATE__PAUSE;
+   else
+   new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+   }
adev->vcn.pause_dpg_mode(adev, _state);
}
  
-		fence[j] += amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec);

+   if (!adev->vcn.has_jpeg_block)
+   fence[j] += 
amdgpu_fence_count_emitted(>jpeg.inst[j].ring_dec);
fence[j] += 
amdgpu_fence_count_emitted(>vcn.inst[j].ring_dec);
fences += fence[j];
}
@@ -358,14 +363,16 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
  
-		if (amdgpu_fence_count_emitted(>jpeg.inst[ring->me].ring_dec))

-   new_state.jpeg = VCN_DPG_STATE__PAUSE;
-   else
-   new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+   if (!adev->vcn.has_jpeg_block) {
+   if 
(amdgpu_fence_count_emitted(>jpeg.inst[ring->me].ring_dec))
+   new_state.jpeg = VCN_DPG_STATE__PAUSE;
+   else
+   new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+   }
  
  		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)

new_state.fw_based = VCN_DPG_STATE__PAUSE;
-   else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+   else if (!adev->vcn.has_jpeg_block && ring->funcs->type == 
AMDGPU_RING_TYPE_VCN_JPEG)
new_state.jpeg = VCN_DPG_STATE__PAUSE;
  
  		adev->vcn.pause_dpg_mode(adev, _state);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 402a5046b985..9a2381d006c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -192,6 +192,8 @@ struct amdgpu_vcn {
unsignedharvest_config;
int (*pause_dpg_mode)(struct amdgpu_device *adev,
struct dpg_pause_state *new_state);
+
+   bool has_jpeg_block;
  };
  
  int amdgpu_vcn_sw_init(struct amdgpu_device *adev);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx