date:20230607

[PATCH] drm/amdkfd: to fix cwsr hang issue

2023-06-07 Thread Ji, Ruili

From: Ruili Ji 

Starting from GC_11_0_4, flag trap_en should be enabled for trap handler.

Signed-off-by: Ruili Ji 
Signed-off-by: Aaron Liu 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 0c1be91a87c6..b695d7a3058c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -227,7 +227,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) 
||
- KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0) 
||
+ KFD_GC_VERSION(q->device) > IP_VERSION(11, 0, 3) 
||
  q->properties.is_dbg_wa;
queue_input.skip_process_ctx_clear = 
qpd->pqm->process->debug_trap_enabled;
 
@@ -1808,7 +1808,7 @@ static int create_queue_cpsch(struct device_queue_manager 
*dqm, struct queue *q,
q->properties.is_evicted = !!qpd->evicted;
q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled &&
KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0) &&
-   KFD_GC_VERSION(q->device) < IP_VERSION(12, 0, 0);
+   KFD_GC_VERSION(q->device) <= IP_VERSION(11, 0, 3);
 
if (qd)
mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 
&q->gart_mqd_addr,
-- 
2.40.1

[PATCH] drm/amdgpu: unmap and remove csa_va properly

2023-06-07 Thread Lang Yu

Root PD BO should be reserved before unmap and remove
a bo_va from VM otherwise lockdep will complain.

v2: check fpriv->csa_va is not NULL instead of amdgpu_mcbp (christian)

[14616.936827] WARNING: CPU: 6 PID: 1711 at 
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1762 amdgpu_vm_bo_del+0x399/0x3f0 
[amdgpu]
[14616.937096] Call Trace:
[14616.937097]  
[14616.937102]  amdgpu_driver_postclose_kms+0x249/0x2f0 [amdgpu]
[14616.937187]  drm_file_free+0x1d6/0x300 [drm]
[14616.937207]  drm_close_helper.isra.0+0x62/0x70 [drm]
[14616.937220]  drm_release+0x5e/0x100 [drm]
[14616.937234]  __fput+0x9f/0x280
[14616.937239]  fput+0xe/0x20
[14616.937241]  task_work_run+0x61/0x90
[14616.937246]  exit_to_user_mode_prepare+0x215/0x220
[14616.937251]  syscall_exit_to_user_mode+0x2a/0x60
[14616.937254]  do_syscall_64+0x48/0x90
[14616.937257]  entry_SYSCALL_64_after_hwframe+0x63/0xcd

Signed-off-by: Lang Yu 
Acked-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 38 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 +++
 3 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index c6d4d41c4393..23d054526e7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -106,3 +106,41 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
ttm_eu_backoff_reservation(&ticket, &list);
return 0;
 }
+
+int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+   struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
+   uint64_t csa_addr)
+{
+   struct ww_acquire_ctx ticket;
+   struct list_head list;
+   struct amdgpu_bo_list_entry pd;
+   struct ttm_validate_buffer csa_tv;
+   int r;
+
+   INIT_LIST_HEAD(&list);
+   INIT_LIST_HEAD(&csa_tv.head);
+   csa_tv.bo = &bo->tbo;
+   csa_tv.num_shared = 1;
+
+   list_add(&csa_tv.head, &list);
+   amdgpu_vm_get_pd_bo(vm, &list, &pd);
+
+   r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+   if (r) {
+   DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+   return r;
+   }
+
+   r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr);
+   if (r) {
+   DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r);
+   ttm_eu_backoff_reservation(&ticket, &list);
+   return r;
+   }
+
+   amdgpu_vm_bo_del(adev, bo_va);
+
+   ttm_eu_backoff_reservation(&ticket, &list);
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
index 524b4437a021..7dfc1f2012eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
@@ -34,6 +34,9 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev, 
struct amdgpu_bo **bo
 int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
  uint64_t csa_addr, uint32_t size);
+int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+   struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
+   uint64_t csa_addr);
 void amdgpu_free_static_csa(struct amdgpu_bo **bo);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 41d047e5de69..e3531aa3c8bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1311,12 +1311,12 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
amdgpu_vce_free_handles(adev, file_priv);
 
-   if (amdgpu_mcbp) {
-   /* TODO: how to handle reserve failure */
-   BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
-   amdgpu_vm_bo_del(adev, fpriv->csa_va);
+   if (fpriv->csa_va) {
+   uint64_t csa_addr = amdgpu_csa_vaddr(adev) & 
AMDGPU_GMC_HOLE_MASK;
+
+   WARN_ON(amdgpu_unmap_static_csa(adev, &fpriv->vm, 
adev->virt.csa_obj,
+   fpriv->csa_va, csa_addr));
fpriv->csa_va = NULL;
-   amdgpu_bo_unreserve(adev->virt.csa_obj);
}
 
pasid = fpriv->vm.pasid;
-- 
2.25.1

[PATCH 1/3] drm/amd/pm: fix vclk setting failed for SMU v13.0.4

2023-06-07 Thread Tim Huang

PMFW use the left-shifted 16 bits argument to set the VCLK
DPM frequency for SMU v13.0.4.

Signed-off-by: Tim Huang 
---
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h |  2 ++
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 11 +--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 5a99a091965e..6a0ac0bbaace 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -51,6 +51,8 @@
 #define CTF_OFFSET_HOTSPOT 5
 #define CTF_OFFSET_MEM 5
 
+#define SMU_13_VCLK_SHIFT  16
+
 extern const int pmfw_decoded_link_speed[5];
 extern const int pmfw_decoded_link_width[7];
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 46a8a366f287..999b07db862e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -831,6 +831,8 @@ static int smu_v13_0_4_set_soft_freq_limited_range(struct 
smu_context *smu,
   uint32_t max)
 {
enum smu_message_type msg_set_min, msg_set_max;
+   uint32_t min_clk = min;
+   uint32_t max_clk = max;
int ret = 0;
 
if (!smu_v13_0_4_clk_dpm_is_enabled(smu, clk_type))
@@ -859,12 +861,17 @@ static int smu_v13_0_4_set_soft_freq_limited_range(struct 
smu_context *smu,
return -EINVAL;
}
 
-   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min, NULL);
+   if (clk_type == SMU_VCLK) {
+   min_clk = min << SMU_13_VCLK_SHIFT;
+   max_clk = max << SMU_13_VCLK_SHIFT;
+   }
+
+   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min_clk, NULL);
if (ret)
return ret;
 
return smu_cmn_send_smc_msg_with_param(smu, msg_set_max,
-  max, NULL);
+  max_clk, NULL);
 }
 
 static int smu_v13_0_4_force_clk_levels(struct smu_context *smu,
-- 
2.34.1

[PATCH 3/3] drm/amd/pm: enable more Pstates profile levels for SMU v13.0.4

2023-06-07 Thread Tim Huang

This patch enables following UMD stable Pstates profile
levels for power_dpm_force_performance_level interface.

- profile_peak
- profile_min_mclk
- profile_min_sclk
- profile_standard

Signed-off-by: Tim Huang 
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c  | 54 ++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 315a6d8bde2e..ef37dda9908f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -54,6 +54,10 @@
 
 #define FEATURE_MASK(feature) (1ULL << feature)
 
+#define SMU_13_0_4_UMD_PSTATE_GFXCLK   938
+#define SMU_13_0_4_UMD_PSTATE_SOCCLK   938
+#define SMU_13_0_4_UMD_PSTATE_FCLK 1875
+
 #define SMC_DPM_FEATURE ( \
FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \
FEATURE_MASK(FEATURE_VCN_DPM_BIT)| \
@@ -908,6 +912,50 @@ static int smu_v13_0_4_force_clk_levels(struct smu_context 
*smu,
return ret;
 }
 
+static int smu_v13_0_4_get_dpm_profile_freq(struct smu_context *smu,
+   enum amd_dpm_forced_level level,
+   enum smu_clk_type clk_type,
+   uint32_t *min_clk,
+   uint32_t *max_clk)
+{
+   int ret = 0;
+   uint32_t clk_limit = 0;
+
+   switch (clk_type) {
+   case SMU_GFXCLK:
+   case SMU_SCLK:
+   clk_limit = SMU_13_0_4_UMD_PSTATE_GFXCLK;
+   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, 
&clk_limit);
+   else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK)
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_SCLK, 
&clk_limit, NULL);
+   break;
+   case SMU_SOCCLK:
+   clk_limit = SMU_13_0_4_UMD_PSTATE_SOCCLK;
+   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_SOCCLK, 
NULL, &clk_limit);
+   break;
+   case SMU_FCLK:
+   clk_limit = SMU_13_0_4_UMD_PSTATE_FCLK;
+   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, 
&clk_limit);
+   else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK)
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_FCLK, 
&clk_limit, NULL);
+   break;
+   case SMU_VCLK:
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_VCLK, NULL, 
&clk_limit);
+   break;
+   case SMU_DCLK:
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_DCLK, NULL, 
&clk_limit);
+   break;
+   default:
+   ret = -EINVAL;
+   break;
+   }
+   *min_clk = *max_clk = clk_limit;
+   return ret;
+}
+
 static int smu_v13_0_4_set_performance_level(struct smu_context *smu,
 enum amd_dpm_forced_level level)
 {
@@ -955,7 +1003,11 @@ static int smu_v13_0_4_set_performance_level(struct 
smu_context *smu,
case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
-   /* Temporarily do nothing since the optimal clocks haven't been 
provided yet */
+   smu_v13_0_4_get_dpm_profile_freq(smu, level, SMU_SCLK, 
&sclk_min, &sclk_max);
+   smu_v13_0_4_get_dpm_profile_freq(smu, level, SMU_FCLK, 
&fclk_min, &fclk_max);
+   smu_v13_0_4_get_dpm_profile_freq(smu, level, SMU_SOCCLK, 
&socclk_min, &socclk_max);
+   smu_v13_0_4_get_dpm_profile_freq(smu, level, SMU_VCLK, 
&vclk_min, &vclk_max);
+   smu_v13_0_4_get_dpm_profile_freq(smu, level, SMU_DCLK, 
&dclk_min, &dclk_max);
break;
case AMD_DPM_FORCED_LEVEL_MANUAL:
case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
-- 
2.34.1

[PATCH 2/3] drm/amd/pm: enable vclk and dclk Pstates for SMU v13.0.4

2023-06-07 Thread Tim Huang

Add the ability to control the vclk and dclk frequency by
power_dpm_force_performance_level interface.

Signed-off-by: Tim Huang 
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c  | 29 +++
 1 file changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 999b07db862e..315a6d8bde2e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -915,6 +915,8 @@ static int smu_v13_0_4_set_performance_level(struct 
smu_context *smu,
uint32_t sclk_min = 0, sclk_max = 0;
uint32_t fclk_min = 0, fclk_max = 0;
uint32_t socclk_min = 0, socclk_max = 0;
+   uint32_t vclk_min = 0, vclk_max = 0;
+   uint32_t dclk_min = 0, dclk_max = 0;
int ret = 0;
 
switch (level) {
@@ -922,22 +924,32 @@ static int smu_v13_0_4_set_performance_level(struct 
smu_context *smu,
smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, 
&sclk_max);
smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, 
&fclk_max);
smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, 
&socclk_max);
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_VCLK, NULL, 
&vclk_max);
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_DCLK, NULL, 
&dclk_max);
sclk_min = sclk_max;
fclk_min = fclk_max;
socclk_min = socclk_max;
+   vclk_min = vclk_max;
+   dclk_min = dclk_max;
break;
case AMD_DPM_FORCED_LEVEL_LOW:
smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, 
NULL);
smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, 
NULL);
smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, 
NULL);
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_VCLK, &vclk_min, 
NULL);
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_DCLK, &dclk_min, 
NULL);
sclk_max = sclk_min;
fclk_max = fclk_min;
socclk_max = socclk_min;
+   vclk_max = vclk_min;
+   dclk_max = dclk_min;
break;
case AMD_DPM_FORCED_LEVEL_AUTO:
smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, 
&sclk_max);
smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, 
&fclk_max);
smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, 
&socclk_max);
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_VCLK, &vclk_min, 
&vclk_max);
+   smu_v13_0_4_get_dpm_ultimate_freq(smu, SMU_DCLK, &dclk_min, 
&dclk_max);
break;
case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
@@ -983,6 +995,23 @@ static int smu_v13_0_4_set_performance_level(struct 
smu_context *smu,
return ret;
}
 
+   if (vclk_min && vclk_max) {
+   ret = smu_v13_0_4_set_soft_freq_limited_range(smu,
+ SMU_VCLK,
+ vclk_min,
+ vclk_max);
+   if (ret)
+   return ret;
+   }
+
+   if (dclk_min && dclk_max) {
+   ret = smu_v13_0_4_set_soft_freq_limited_range(smu,
+ SMU_DCLK,
+ dclk_min,
+ dclk_max);
+   if (ret)
+   return ret;
+   }
return ret;
 }
 
-- 
2.34.1

RE: [PATCH] drm/amdgpu: display/Kconfig: replace leading spaces with tab

2023-06-07 Thread Chen, Guchun

[Public]

It's 
https://gitlab.freedesktop.org/agd5f/linux/-/tree/amd-staging-drm-next?ref_type=heads.
 Latest patches including yours's will be pushed to this branch after a while.

Regards,
Guchun

> -Original Message-
> From: amd-gfx  On Behalf Of Sui
> Jingfeng
> Sent: Wednesday, June 7, 2023 2:34 PM
> To: Alex Deucher 
> Cc: Li, Sun peng (Leo) ; David Airlie
> ; Pan, Xinhui ; Siqueira, Rodrigo
> ; linux-ker...@vger.kernel.org; dri-
> de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Daniel Vetter
> ; Deucher, Alexander ;
> Wentland, Harry ; Koenig, Christian
> 
> Subject: Re: [PATCH] drm/amdgpu: display/Kconfig: replace leading spaces
> with tab
>
> https://cgit.freedesktop.org/amd/drm-amd/
>
>
> This one has a long time with no update.
>
>
> On 2023/6/7 14:31, Sui Jingfeng wrote:
> > Hi,
> >
> > On 2023/6/7 03:15, Alex Deucher wrote:
> >> Applied.  Thanks!
> >
> > Where is the official branch of drm/amdgpu, I can't find it on the
> > internet.
> >
> > Sorry for asking this silly question.
>
> >
> >> Alex
> >>
> >> On Tue, Jun 6, 2023 at 9:33 AM Sui Jingfeng 
> >> wrote:
> >>> This patch replace the leading spaces with tab, make them keep
> >>> aligned with the rest of the config options. No functional change.
> >>>
> >>> Signed-off-by: Sui Jingfeng 
> >>> ---
> >>>   drivers/gpu/drm/amd/display/Kconfig | 17 +++--
> >>>   1 file changed, 7 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/display/Kconfig
> >>> b/drivers/gpu/drm/amd/display/Kconfig
> >>> index 2d8e55e29637..04ccfc70d583 100644
> >>> --- a/drivers/gpu/drm/amd/display/Kconfig
> >>> +++ b/drivers/gpu/drm/amd/display/Kconfig
> >>> @@ -42,16 +42,13 @@ config DEBUG_KERNEL_DC
> >>>Choose this option if you want to hit kdgb_break in assert.
> >>>
> >>>   config DRM_AMD_SECURE_DISPLAY
> >>> -bool "Enable secure display support"
> >>> -depends on DEBUG_FS
> >>> -depends on DRM_AMD_DC_FP
> >>> -help
> >>> -Choose this option if you want to
> >>> -support secure display
> >>> -
> >>> -This option enables the calculation
> >>> -of crc of specific region via debugfs.
> >>> -Cooperate with specific DMCU FW.
> >>> +   bool "Enable secure display support"
> >>> +   depends on DEBUG_FS
> >>> +   depends on DRM_AMD_DC_FP
> >>> +   help
> >>> + Choose this option if you want to support secure display
> >>>
> >>> + This option enables the calculation of crc of specific
> >>> region via
> >>> + debugfs. Cooperate with specific DMCU FW.
> >>>
> >>>   endmenu
> >>> --
> >>> 2.25.1
> >>>
> --
> Jingfeng

RE: [PATCH 1/3] drm/amdgpu: add cached GPU fault structure to vm struct

2023-06-07 Thread Chen, Guchun

[Public]

3 nitpick comments.

> -Original Message-
> From: amd-gfx  On Behalf Of Alex
> Deucher
> Sent: Friday, May 26, 2023 12:52 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ;
> samuel.pitoi...@gmail.com
> Subject: [PATCH 1/3] drm/amdgpu: add cached GPU fault structure to vm
> struct
>
> When we get a GPU pge fault, cache the fault for later analysis.

A spelling typo, s/pge/page

> Cc: samuel.pitoi...@gmail.com
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31
> ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 18 +++
>  2 files changed, 49 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 22f9a65ca0fc..73e022f3daa4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2631,3 +2631,34 @@ void amdgpu_debugfs_vm_bo_info(struct
> amdgpu_vm *vm, struct seq_file *m)
>  total_done_objs);
>  }
>  #endif
> +
> +/**
> + * amdgpu_vm_update_fault_cache - update cached fault into.
> + * @adev: amdgpu device pointer
> + * @pasid: PASID of the VM
> + * @addr: Address of the fault
> + * @status: fault status register

I guess this 'status' means the status from fault status register.

> + * @vmhub: which vmhub got the fault
> + *
> + * Cache the fault info for later use by userspace in debuggging.
A spelling typo, s/debuggging/debugging.

Regards,
Guchun
> + */
> +void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
> +   unsigned int pasid,
> +   uint64_t addr,
> +   uint32_t status,
> +   unsigned int vmhub)
> +{
> + struct amdgpu_vm *vm;
> + unsigned long flags;
> +
> + xa_lock_irqsave(&adev->vm_manager.pasids, flags);
> +
> + vm = xa_load(&adev->vm_manager.pasids, pasid);
> + if (vm) {
> + vm->fault_info.addr = addr;
> + vm->fault_info.status = status;
> + vm->fault_info.vmhub = vmhub;
> + }
> + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); }
> +
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 14f9a2bf3acb..fb66a413110c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -244,6 +244,15 @@ struct amdgpu_vm_update_funcs {
> struct dma_fence **fence);
>  };
>
> +struct amdgpu_vm_fault_info {
> + /* fault address */
> + uint64_taddr;
> + /* fault status register */
> + uint32_tstatus;
> + /* which vmhub? gfxhub, mmhub, etc. */
> + unsigned intvmhub;
> +};
> +
>  struct amdgpu_vm {
>   /* tree of virtual addresses mapped */
>   struct rb_root_cached   va;
> @@ -332,6 +341,9 @@ struct amdgpu_vm {
>
>   /* Memory partition number, -1 means any partition */
>   int8_t  mem_id;
> +
> + /* cached fault info */
> + struct amdgpu_vm_fault_info fault_info;
>  };
>
>  struct amdgpu_vm_manager {
> @@ -540,4 +552,10 @@ static inline void
> amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
>   mutex_unlock(&vm->eviction_lock);
>  }
>
> +void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
> +   unsigned int pasid,
> +   uint64_t addr,
> +   uint32_t status,
> +   unsigned int vmhub);
> +
>  #endif
> --
> 2.40.1

RE: [PATCH 0/3] Add GPU page fault query interface

2023-06-07 Thread Chen, Guchun

[Public]

With the 3 nitpicks fixed in patch 1, the series are: Acked-by: Guchun Chen 


Regards,
Guchun

> -Original Message-
> From: amd-gfx  On Behalf Of Alex
> Deucher
> Sent: Tuesday, June 6, 2023 11:40 PM
> To: Deucher, Alexander 
> Cc: samuel.pitoi...@gmail.com; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 0/3] Add GPU page fault query interface
>
> Ping on this series?
>
> Alex
>
> On Thu, May 25, 2023 at 12:52 PM Alex Deucher
>  wrote:
> >
> > This patch set adds support for an application to query GPU page
> > faults.  It's useful for debugging and there are vulkan extensions
> > that could make use of this.  Preliminary user space code which uses
> > this can be found here:
> > https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238
> > https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/298
> >
> > Note, that I made a small change to the vmhub definition to decouple
> > it from how the kernel tracks vmhubs so that we have a consistent user
> > view even if we decide to add more vmhubs like we recently did for gfx
> > 9.4.3.
> >
> > I've also pushed the changed to:
> > https://gitlab.freedesktop.org/agd5f/linux/-/commits/gpu_fault_info_io
> > ctl
> >
> >
> > Alex Deucher (3):
> >   drm/amdgpu: add cached GPU fault structure to vm struct
> >   drm/amdgpu: cache gpuvm fault information for gmc7+
> >   drm/amdgpu: add new INFO ioctl query for the last GPU page fault
> >
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 +-
> > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 16 +
> > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 45
> +
> > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 31 +++--
> > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  3 ++
> > drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  |  3 ++
> >  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   |  3 ++
> >  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   |  3 ++
> >  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 11 +++---
> >  include/uapi/drm/amdgpu_drm.h   | 16 +
> >  10 files changed, 126 insertions(+), 8 deletions(-)
> >
> > --
> > 2.40.1
> >

Re: [PATCH v5 04/13] drm/connector: Use common colorspace_names array

2023-06-07 Thread Simon Ser



On Tuesday, June 6th, 2023 at 22:25, Harry Wentland  
wrote:

> We an use bitfields to track the support ones for HDMI

Typo: "We can"

Re: [PATCH v5 06/13] drm/connector: Allow drivers to pass list of supported colorspaces

2023-06-07 Thread Simon Ser

On Tuesday, June 6th, 2023 at 22:26, Harry Wentland  
wrote:

> -int drm_mode_create_hdmi_colorspace_property(struct drm_connector *connector)
> +int drm_mode_create_hdmi_colorspace_property(struct drm_connector *connector,
> +  u32 supported_colorspaces)
>  {
> - return drm_mode_create_colorspace_property(connector, hdmi_colorspaces);
> + u32 colorspaces = supported_colorspaces & hdmi_colorspaces;

This creates a potentially weird situation where the driver passes a
non-0 supported_colorspaces, but the intersection with hdmi_colorspaces
ends up being empty, and all colorspaces end up being advertised.

Re: PROBLEM: AMD Ryzen 9 7950X iGPU - Blinking Issue

2023-06-07 Thread Felix Richter

Hi Guys,

so I checked, the kernel I am running has this commit
(https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

/commit/?id=08da182175db4c7f80850354849d95f2670e8cd9) applied already!

https://github.com/ju6ge/linux/commit/917680e6056aa288cac288d3afd2745d372beb61u

And the bug of display flickering persists with or without the
amdgpu.sg_display=0 variable applied!

Kind regards,
Felix Richter

On 6/5/23 16:11, Alex Deucher wrote:

+ Hamza
This is a known issue. You can workaround it by setting
amdgpu.sg_display=0. It should be issue should be fixed in:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=08da182175db4c7f80850354849d95f2670e8cd9

Alex

Now if this is the desired long term fix I do not know …

Kind regards,
Felix Richter

On 02.05.23 16:12, Linux regression tracking (Thorsten Leemhuis) wrote:

On 02.05.23 15:48, Felix Richter wrote:

On 5/2/23 15:34, Linux regression tracking (Thorsten Leemhuis) wrote:

On 02.05.23 15:13, Alex Deucher wrote:

On Tue, May 2, 2023 at 7:45 AM Linux regression tracking (Thorsten
Leemhuis) wrote:

On 30.04.23 13:44, Felix Richter wrote:

Hi,

I am running into an issue with the integrated GPU of the Ryzen 9
7950X. It seems to be a regression from kernel version 6.1 to 6.2.
The bug materializes in from of my monitor blinking, meaning it
turns full white shortly. This happens very often so that the
system becomes unpleasant to use.

I am running the Archlinux Kernel:
The Issue happens on the bleeding edge kernel: 6.2.13
Switching back to the LTS kernel resolves the issue: 6.1.26

I have two monitors attached to the system. One 42 inch 4k Display
and a 24 inch 1080p Display and am running sway as my desktop.

Let me know if there is more information I could provide to help
narrow down the issue.

Thanks for the report. To be sure the issue doesn't fall through the
cracks unnoticed, I'm adding it to regzbot, the Linux kernel regression
tracking bot:

#regzbot ^introduced v6.1..v6.2
#regzbot title drm: amdgpu: system becomes unpleasant to use after
monitor starts blinking and turns full white
#regzbot ignore-activity

This isn't a regression? This issue or a fix for it are already
discussed somewhere else? It was fixed already? You want to clarify
when
the regression started to happen? Or point out I got the title or
something else totally wrong? Then just reply and tell me -- ideally
while also telling regzbot about it, as explained by the page listed in
the footer of this mail.

Developers: When fixing the issue, remember to add 'Link:' tags
pointing
to the report (the parent of this mail). See page linked in footer for
details.

This sounds exactly like the issue that was fixed in this patch which
is already on it's way to Linus:
https://gitlab.freedesktop.org/agd5f/linux/-/commit/08da182175db4c7f80850354849d95f2670e8cd9

FWIW, you in the flood of emails likely missed that this is the same
thread where you yesterday replied "If the module parameter didn't help
then perhaps you are seeing some other issue. Can you bisect?". That's
why I decided to add this to the tracking. Or am I missing something
obvious here?

/me looks around again and can't see anything, but that doesn't have to
mean anything...

Felix, btw, this guide might help you with the bisection, even if it's
just for kernel compilation:

https://docs.kernel.org/next/admin-guide/quickly-build-trimmed-linux.html

And to indirectly reply to your mail from yesterday[1]. You might want
to ignore the arch linux kernel git repo and just do a bisection between
6.1 and the latest 6.2.y kernel using upstream repos; and if I were you
I'd also try 6.3 or even mainline before that, in case the issue was
fixed already.

[1]
https://lore.kernel.org/all/04749ee4-0728-92fe-bcb0-a7320279e...@felixrichter.tech/

Thanks for the pointers, I'll do a bisection on my desktop from 6.1 to
the newest commit.

FWIW, I wonder what you actually mean with "newest commit" here: a
bisection between 6.1 and mainline HEAD might be a waste of time, *if*
this is something that only happens in 6.2.y (say due to a broken or
incomplete backport)

That was the part I was mostly unsure about … where
to start from.

I was planning to use PKGBUILD scripts from arch to achieve the same
configuration as I would when installing
the package and just rewrite the script to use a local copy of the
source code instead of the repository.
That way I can just use the bisect command, rebuild the package and test
again.

In my experience trying to deal with Linux distro's package managers
creates more trouble than it's worth.

But I probably won't be able to finish it this week, since I am on
vacation starting tomorrow and will not have access to the computer in
question. I will be back next week, by that time the patch Alex is
talking about might
already be in mainline. So if that fixes it, I will notice and let you
know. If not I will do the bisection to figure out what the actual issue
is.

Enjoy

Re: [PATCH v5 04/13] drm/connector: Use common colorspace_names array

2023-06-07 Thread Simon Ser

On Tuesday, June 6th, 2023 at 22:25, Harry Wentland  
wrote:

> + if (supported_colorspaces != 0 && (colorspaces & BIT(i)) == 0)

This patch actually also introduces a change in behavior: passing no
colorspace will make the function advertise all colorspaces. I have a
hard time understanding how this can be useful: we want to either
advertise all DP colorspaces, or all HDMI colorspaces, but not both?

One way to fix this would be to handle the "zero means everything"
behavior in the specific DP/HDMI callers. But I wonder, is it really
worth the magic if we can expose a simple const variable with all
DP/HDMI colorspaces?

Re: [PATCH] drm/amdgpu: display/Kconfig: replace leading spaces with tab

2023-06-07 Thread Sui Jingfeng


Hi,

On 2023/6/7 17:09, Chen, Guchun wrote:

[Public]

It's 
https://gitlab.freedesktop.org/agd5f/linux/-/tree/amd-staging-drm-next?ref_type=heads.
 Latest patches including yours's will be pushed to this branch after a while.


Now I know,  thanks for your kindness reply.


Regards,
Guchun


-Original Message-
From: amd-gfx  On Behalf Of Sui
Jingfeng
Sent: Wednesday, June 7, 2023 2:34 PM
To: Alex Deucher 
Cc: Li, Sun peng (Leo) ; David Airlie
; Pan, Xinhui ; Siqueira, Rodrigo
; linux-ker...@vger.kernel.org; dri-
de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Daniel Vetter
; Deucher, Alexander ;
Wentland, Harry ; Koenig, Christian

Subject: Re: [PATCH] drm/amdgpu: display/Kconfig: replace leading spaces
with tab

https://cgit.freedesktop.org/amd/drm-amd/


This one has a long time with no update.


On 2023/6/7 14:31, Sui Jingfeng wrote:

Hi,

On 2023/6/7 03:15, Alex Deucher wrote:

Applied.  Thanks!

Where is the official branch of drm/amdgpu, I can't find it on the
internet.

Sorry for asking this silly question.

Alex

On Tue, Jun 6, 2023 at 9:33 AM Sui Jingfeng 
wrote:

This patch replace the leading spaces with tab, make them keep
aligned with the rest of the config options. No functional change.

Signed-off-by: Sui Jingfeng 
---
   drivers/gpu/drm/amd/display/Kconfig | 17 +++--
   1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/Kconfig
b/drivers/gpu/drm/amd/display/Kconfig
index 2d8e55e29637..04ccfc70d583 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -42,16 +42,13 @@ config DEBUG_KERNEL_DC
Choose this option if you want to hit kdgb_break in assert.

   config DRM_AMD_SECURE_DISPLAY
-bool "Enable secure display support"
-depends on DEBUG_FS
-depends on DRM_AMD_DC_FP
-help
-Choose this option if you want to
-support secure display
-
-This option enables the calculation
-of crc of specific region via debugfs.
-Cooperate with specific DMCU FW.
+   bool "Enable secure display support"
+   depends on DEBUG_FS
+   depends on DRM_AMD_DC_FP
+   help
+ Choose this option if you want to support secure display

+ This option enables the calculation of crc of specific
region via
+ debugfs. Cooperate with specific DMCU FW.

   endmenu
--
2.25.1


--
Jingfeng


--
Jingfeng

[PATCH v2 06/07] drm/amdgpu: add option params to enforce process isolation between graphics and compute

2023-06-07 Thread Chong Li

enforce process isolation between graphics and compute via using the same 
reserved vmid.

v2: remove params "struct amdgpu_vm *vm" from
amdgpu_vmid_alloc_reserved and amdgpu_vmid_free_reserved.

Signed-off-by: Chong Li 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 17 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 22 +-
 5 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ce196badf42d..ef098a7287d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -215,6 +215,7 @@ extern int amdgpu_force_asic_type;
 extern int amdgpu_smartshift_bias;
 extern int amdgpu_use_xgmi_p2p;
 extern int amdgpu_mtype_local;
+extern bool enforce_isolation;
 #ifdef CONFIG_HSA_AMD
 extern int sched_policy;
 extern bool debug_evictions;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3d91e123f9bd..fdb6fb8229ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -153,7 +153,7 @@ uint amdgpu_pg_mask = 0x;
 uint amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu;
 char *amdgpu_virtual_display;
-
+bool enforce_isolation;
 /*
  * OverDrive(bit 14) disabled by default
  * GFX DCS(bit 19) disabled by default
@@ -973,6 +973,14 @@ MODULE_PARM_DESC(
4 = 
AMDGPU_CPX_PARTITION_MODE)");
 module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
 
+
+/**
+ * DOC: enforce_isolation (bool)
+ * enforce process isolation between graphics and compute via using the same 
reserved vmid.
+ */
+module_param(enforce_isolation, bool, 0444);
+MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between 
graphics and compute . enforce_isolation = on");
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index c991ca0b7a1c..ff1ea99292fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -409,7 +409,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
if (r || !idle)
goto error;
 
-   if (vm->reserved_vmid[vmhub]) {
+   if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == 
AMDGPU_GFXHUB(0 {
r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
if (r || !id)
goto error;
@@ -460,14 +460,11 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
 }
 
 int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
-  struct amdgpu_vm *vm,
   unsigned vmhub)
 {
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 
mutex_lock(&id_mgr->lock);
-   if (vm->reserved_vmid[vmhub])
-   goto unlock;
 
++id_mgr->reserved_use_count;
if (!id_mgr->reserved) {
@@ -479,27 +476,23 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
list_del_init(&id->list);
id_mgr->reserved = id;
}
-   vm->reserved_vmid[vmhub] = true;
 
-unlock:
mutex_unlock(&id_mgr->lock);
return 0;
 }
 
 void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
-  struct amdgpu_vm *vm,
   unsigned vmhub)
 {
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 
mutex_lock(&id_mgr->lock);
-   if (vm->reserved_vmid[vmhub] &&
-   !--id_mgr->reserved_use_count) {
+   if (!--id_mgr->reserved_use_count) {
/* give the reserved ID back to normal round robin */
list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
id_mgr->reserved = NULL;
}
-   vm->reserved_vmid[vmhub] = false;
+
mutex_unlock(&id_mgr->lock);
 }
 
@@ -578,6 +571,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
}
}
+   /* alloc a default reserved vmid to enforce isolation */
+   if (enforce_isolation)
+   amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
+
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index d1cc09b45da4..68add23dc87c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -79,11 +79,9 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *ade

[PATCH] drm/amd/pm: enable more Pstates profile levels for yellow_carp

2023-06-07 Thread shikaguo

This patch enables following UMD stable Pstates profile levels for 
power_dpm_force_performance_level interface.

- profile_peak
- profile_min_mclk
- profile_min_sclk
- profile_standard

Signed-off-by: shikaguo 
---
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c  | 94 ++-
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h  |  8 +-
 2 files changed, 98 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
index a92da336ecec..5c968ab2ea8d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -957,6 +957,9 @@ static int yellow_carp_set_soft_freq_limited_range(struct 
smu_context *smu,
uint32_t max)
 {
enum smu_message_type msg_set_min, msg_set_max;
+   uint32_t min_clk = min;
+   uint32_t max_clk = max;
+
int ret = 0;
 
if (!yellow_carp_clk_dpm_is_enabled(smu, clk_type))
@@ -985,11 +988,17 @@ static int yellow_carp_set_soft_freq_limited_range(struct 
smu_context *smu,
return -EINVAL;
}
 
-   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min, NULL);
+   if (clk_type == SMU_VCLK) {
+   min_clk = min << SMU_13_VCLK_SHIFT;
+   max_clk = max << SMU_13_VCLK_SHIFT;
+   }
+
+   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min_clk, NULL);
+
if (ret)
goto out;
 
-   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max, NULL);
+   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max_clk, NULL);
if (ret)
goto out;
 
@@ -1107,6 +1116,50 @@ static int yellow_carp_force_clk_levels(struct 
smu_context *smu,
return ret;
 }
 
+static int yellow_carp_get_dpm_profile_freq(struct smu_context *smu,
+   enum amd_dpm_forced_level level,
+   enum smu_clk_type clk_type,
+   uint32_t *min_clk,
+   uint32_t *max_clk)
+{
+   int ret = 0;
+   uint32_t clk_limit = 0;
+
+   switch (clk_type) {
+   case SMU_GFXCLK:
+   case SMU_SCLK:
+   clk_limit = YELLOW_CARP_UMD_PSTATE_GFXCLK;
+   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, 
&clk_limit);
+   else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK)
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_SCLK, 
&clk_limit, NULL);
+   break;
+   case SMU_SOCCLK:
+   clk_limit = YELLOW_CARP_UMD_PSTATE_SOCCLK;
+   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_SOCCLK, 
NULL, &clk_limit);
+   break;
+   case SMU_FCLK:
+   clk_limit = YELLOW_CARP_UMD_PSTATE_FCLK;
+   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, 
&clk_limit);
+   else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK)
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_FCLK, 
&clk_limit, NULL);
+   break;
+   case SMU_VCLK:
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_VCLK, NULL, 
&clk_limit);
+   break;
+   case SMU_DCLK:
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_DCLK, NULL, 
&clk_limit);
+   break;
+   default:
+   ret = -EINVAL;
+   break;
+   }
+   *min_clk = *max_clk = clk_limit;
+   return ret;
+}
+
 static int yellow_carp_set_performance_level(struct smu_context *smu,
enum amd_dpm_forced_level level)
 {
@@ -1114,6 +1167,9 @@ static int yellow_carp_set_performance_level(struct 
smu_context *smu,
uint32_t sclk_min = 0, sclk_max = 0;
uint32_t fclk_min = 0, fclk_max = 0;
uint32_t socclk_min = 0, socclk_max = 0;
+   uint32_t vclk_min = 0, vclk_max = 0;
+   uint32_t dclk_min = 0, dclk_max = 0;
+
int ret = 0;
 
switch (level) {
@@ -1121,28 +1177,42 @@ static int yellow_carp_set_performance_level(struct 
smu_context *smu,
yellow_carp_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, 
&sclk_max);
yellow_carp_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, 
&fclk_max);
yellow_carp_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, 
&socclk_max);
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_VCLK, NULL, 
&vclk_max);
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_DCLK, NULL, 
&dclk_max);
sclk_min = sclk_max;
fclk_min = fclk_max;
socclk_min = socclk_max;
+

Re: [PATCH v2 06/07] drm/amdgpu: add option params to enforce process isolation between graphics and compute

2023-06-07 Thread Christian König


Am 07.06.23 um 12:57 schrieb Chong Li:

enforce process isolation between graphics and compute via using the same 
reserved vmid.

v2: remove params "struct amdgpu_vm *vm" from
 amdgpu_vmid_alloc_reserved and amdgpu_vmid_free_reserved.

Signed-off-by: Chong Li 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 17 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  6 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 22 +-
  5 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ce196badf42d..ef098a7287d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -215,6 +215,7 @@ extern int amdgpu_force_asic_type;
  extern int amdgpu_smartshift_bias;
  extern int amdgpu_use_xgmi_p2p;
  extern int amdgpu_mtype_local;
+extern bool enforce_isolation;
  #ifdef CONFIG_HSA_AMD
  extern int sched_policy;
  extern bool debug_evictions;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3d91e123f9bd..fdb6fb8229ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -153,7 +153,7 @@ uint amdgpu_pg_mask = 0x;
  uint amdgpu_sdma_phase_quantum = 32;
  char *amdgpu_disable_cu;
  char *amdgpu_virtual_display;
-
+bool enforce_isolation;
  /*
   * OverDrive(bit 14) disabled by default
   * GFX DCS(bit 19) disabled by default
@@ -973,6 +973,14 @@ MODULE_PARM_DESC(
4 = 
AMDGPU_CPX_PARTITION_MODE)");
  module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
  
+

+/**
+ * DOC: enforce_isolation (bool)
+ * enforce process isolation between graphics and compute via using the same 
reserved vmid.
+ */
+module_param(enforce_isolation, bool, 0444);
+MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and 
compute . enforce_isolation = on");
+
  /* These devices are not supported by amdgpu.
   * They are supported by the mach64, r128, radeon drivers
   */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index c991ca0b7a1c..ff1ea99292fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -409,7 +409,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
if (r || !idle)
goto error;
  
-	if (vm->reserved_vmid[vmhub]) {

+   if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == 
AMDGPU_GFXHUB(0 {
r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
if (r || !id)
goto error;
@@ -460,14 +460,11 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
  }
  
  int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,

-  struct amdgpu_vm *vm,
   unsigned vmhub)
  {
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
  
  	mutex_lock(&id_mgr->lock);

-   if (vm->reserved_vmid[vmhub])
-   goto unlock;
  
  	++id_mgr->reserved_use_count;

if (!id_mgr->reserved) {
@@ -479,27 +476,23 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
list_del_init(&id->list);
id_mgr->reserved = id;
}
-   vm->reserved_vmid[vmhub] = true;
  
-unlock:

mutex_unlock(&id_mgr->lock);
return 0;
  }
  
  void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,

-  struct amdgpu_vm *vm,
   unsigned vmhub)
  {
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
  
  	mutex_lock(&id_mgr->lock);

-   if (vm->reserved_vmid[vmhub] &&
-   !--id_mgr->reserved_use_count) {
+   if (!--id_mgr->reserved_use_count) {
/* give the reserved ID back to normal round robin */
list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
id_mgr->reserved = NULL;
}
-   vm->reserved_vmid[vmhub] = false;
+
mutex_unlock(&id_mgr->lock);
  }
  
@@ -578,6 +571,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)

list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
}
}
+   /* alloc a default reserved vmid to enforce isolation */
+   if (enforce_isolation)
+   amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
+
  }
  
  /**

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index d1cc09b45da4..68add23dc87c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -79,11 +79,9 @@ void

95 matches

Mail list logo