[PATCH v3] drm/amdgpu: Disable dpm_enabled flag while VF is in reset

2024-08-12 Thread Victor Skvortsov
VFs do not perform HW fini/suspend in FLR, so the dpm_enabled
is incorrectly kept enabled. Add interface to disable it in
virt_pre_reset call.

v2: Made implementation generic for all asics
v3: Re-order conditionals so PP_MP1_STATE_FLR is only evaluated on VF

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   | 1 +
 drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 +
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c| 6 +-
 5 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 29a4adee9286..a6b8d0ba4758 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5289,10 +5289,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
if (reset_context->reset_req_dev == adev)
job = reset_context->job;
 
-   if (amdgpu_sriov_vf(adev)) {
-   /* stop the data exchange thread */
-   amdgpu_virt_fini_data_exchange(adev);
-   }
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_virt_pre_reset(adev);
 
amdgpu_fence_driver_isr_toggle(adev, true);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index b287a82e6177..b6397d3229e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -33,6 +33,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_reset.h"
+#include "amdgpu_dpm.h"
 #include "vi.h"
 #include "soc15.h"
 #include "nv.h"
@@ -849,6 +850,13 @@ enum amdgpu_sriov_vf_mode 
amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad
return mode;
 }
 
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev)
+{
+   /* stop the data exchange thread */
+   amdgpu_virt_fini_data_exchange(adev);
+   amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR);
+}
+
 void amdgpu_virt_post_reset(struct amdgpu_device *adev)
 {
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b42a8854dca0..b650a2032c42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -376,6 +376,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
  u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
 bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
uint32_t ucode_id);
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev);
 void amdgpu_virt_post_reset(struct amdgpu_device *adev);
 bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
 bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 4b20e2274313..19a48d98830a 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -218,6 +218,7 @@ enum pp_mp1_state {
PP_MP1_STATE_SHUTDOWN,
PP_MP1_STATE_UNLOAD,
PP_MP1_STATE_RESET,
+   PP_MP1_STATE_FLR,
 };
 
 enum pp_df_cstate {
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 8b7d6ed7e2ed..9dc82f4d7c93 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -168,7 +168,11 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
int ret = 0;
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 
-   if (pp_funcs && pp_funcs->set_mp1_state) {
+   if (mp1_state == PP_MP1_STATE_FLR) {
+   /* VF lost access to SMU */
+   if (amdgpu_sriov_vf(adev))
+   adev->pm.dpm_enabled = false;
+   } else if (pp_funcs && pp_funcs->set_mp1_state) {
mutex_lock(&adev->pm.mutex);
 
ret = pp_funcs->set_mp1_state(
-- 
2.34.1



[PATCH v2] drm/amdgpu: Disable dpm_enabled flag while VF is in reset

2024-08-12 Thread Victor Skvortsov
VFs do not perform HW fini/suspend in FLR, so the dpm_enabled
is incorrectly kept enabled. Add interface to disable it in
virt_pre_reset call.

v2: Made implementation generic for all asics

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   | 1 +
 drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 +
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c| 5 -
 5 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 29a4adee9286..a6b8d0ba4758 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5289,10 +5289,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
if (reset_context->reset_req_dev == adev)
job = reset_context->job;
 
-   if (amdgpu_sriov_vf(adev)) {
-   /* stop the data exchange thread */
-   amdgpu_virt_fini_data_exchange(adev);
-   }
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_virt_pre_reset(adev);
 
amdgpu_fence_driver_isr_toggle(adev, true);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index b287a82e6177..b6397d3229e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -33,6 +33,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_reset.h"
+#include "amdgpu_dpm.h"
 #include "vi.h"
 #include "soc15.h"
 #include "nv.h"
@@ -849,6 +850,13 @@ enum amdgpu_sriov_vf_mode 
amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad
return mode;
 }
 
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev)
+{
+   /* stop the data exchange thread */
+   amdgpu_virt_fini_data_exchange(adev);
+   amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR);
+}
+
 void amdgpu_virt_post_reset(struct amdgpu_device *adev)
 {
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b42a8854dca0..b650a2032c42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -376,6 +376,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
  u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
 bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
uint32_t ucode_id);
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev);
 void amdgpu_virt_post_reset(struct amdgpu_device *adev);
 bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
 bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 4b20e2274313..19a48d98830a 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -218,6 +218,7 @@ enum pp_mp1_state {
PP_MP1_STATE_SHUTDOWN,
PP_MP1_STATE_UNLOAD,
PP_MP1_STATE_RESET,
+   PP_MP1_STATE_FLR,
 };
 
 enum pp_df_cstate {
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 8b7d6ed7e2ed..af39206a2c5f 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -168,7 +168,10 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
int ret = 0;
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 
-   if (pp_funcs && pp_funcs->set_mp1_state) {
+   if (amdgpu_sriov_vf(adev) && mp1_state == PP_MP1_STATE_FLR) {
+   /* VF lost access to SMU */
+   adev->pm.dpm_enabled = false;
+   } else if (pp_funcs && pp_funcs->set_mp1_state) {
mutex_lock(&adev->pm.mutex);
 
ret = pp_funcs->set_mp1_state(
-- 
2.34.1



[PATCH] drm/amdgpu: Block MMR_READ IOCTL in reset

2024-08-08 Thread Victor Skvortsov
Register access from userspace should be blocked until
reset is complete.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 44 ++---
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 260cd0ad286d..038b400be437 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -778,6 +778,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
? -EFAULT : 0;
}
case AMDGPU_INFO_READ_MMR_REG: {
+   int ret = 0;
unsigned int n, alloc_size;
uint32_t *regs;
unsigned int se_num = (info->read_mmr_reg.instance >>
@@ -787,24 +788,39 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
   AMDGPU_INFO_MMR_SH_INDEX_SHIFT) &
  AMDGPU_INFO_MMR_SH_INDEX_MASK;
 
+   if (!down_read_trylock(&adev->reset_domain->sem))
+   return -ENOENT;
+
/* set full masks if the userspace set all bits
 * in the bitfields
 */
-   if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK)
+   if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) {
se_num = 0x;
-   else if (se_num >= AMDGPU_GFX_MAX_SE)
-   return -EINVAL;
-   if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
+   }
+   else if (se_num >= AMDGPU_GFX_MAX_SE) {
+   ret = -EINVAL;
+   goto out;
+   }
+
+   if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) {
sh_num = 0x;
-   else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE)
-   return -EINVAL;
+   }
+   else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) {
+   ret = -EINVAL;
+   goto out;
+   }
 
-   if (info->read_mmr_reg.count > 128)
-   return -EINVAL;
+   if (info->read_mmr_reg.count > 128) {
+   ret = -EINVAL;
+   goto out;
+   }
 
regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), 
GFP_KERNEL);
-   if (!regs)
-   return -ENOMEM;
+   if (!regs) {
+   ret = -ENOMEM;
+   goto out;
+   }
+
alloc_size = info->read_mmr_reg.count * sizeof(*regs);
 
amdgpu_gfx_off_ctrl(adev, false);
@@ -816,13 +832,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
  info->read_mmr_reg.dword_offset + 
i);
kfree(regs);
amdgpu_gfx_off_ctrl(adev, true);
-   return -EFAULT;
+   ret = -EFAULT;
+   goto out;
}
}
amdgpu_gfx_off_ctrl(adev, true);
n = copy_to_user(out, regs, min(size, alloc_size));
kfree(regs);
-   return n ? -EFAULT : 0;
+   ret = (n ? -EFAULT : 0);
+out:
+   up_read(&adev->reset_domain->sem);
+   return ret;
}
case AMDGPU_INFO_DEV_INFO: {
struct drm_amdgpu_info_device *dev_info;
-- 
2.34.1



[PATCH] drm/amdgpu: Disable dpm_enabled flag while VF is in reset

2024-08-08 Thread Victor Skvortsov
VFs do not perform HW fini/suspend in FLR, so the dpm_enabled
is incorrectly kept enabled. Add interface to disable it in
virt_pre_reset call.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 10 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  8 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  1 +
 .../gpu/drm/amd/include/kgd_pp_interface.h|  1 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 21 +++
 5 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 730dae77570c..1be5699f4190 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5288,10 +5288,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
if (reset_context->reset_req_dev == adev)
job = reset_context->job;
 
-   if (amdgpu_sriov_vf(adev)) {
-   /* stop the data exchange thread */
-   amdgpu_virt_fini_data_exchange(adev);
-   }
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_virt_pre_reset(adev);
 
amdgpu_fence_driver_isr_toggle(adev, true);
 
@@ -5561,6 +5559,10 @@ int amdgpu_do_asic_reset(struct list_head 
*device_list_handle,
 
 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
 {
+   if (amdgpu_sriov_vf(adev)) {
+   adev->mp1_state = PP_MP1_STATE_FLR;
+   return;
+   }
 
switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_MODE1:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 111c380f929b..456a685c3975 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -33,6 +33,7 @@
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_reset.h"
+#include "amdgpu_dpm.h"
 #include "vi.h"
 #include "soc15.h"
 #include "nv.h"
@@ -849,6 +850,13 @@ enum amdgpu_sriov_vf_mode 
amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad
return mode;
 }
 
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev)
+{
+   /* stop the data exchange thread */
+   amdgpu_virt_fini_data_exchange(adev);
+   amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR);
+}
+
 void amdgpu_virt_post_reset(struct amdgpu_device *adev)
 {
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b42a8854dca0..b650a2032c42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -376,6 +376,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
  u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
 bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
uint32_t ucode_id);
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev);
 void amdgpu_virt_post_reset(struct amdgpu_device *adev);
 bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
 bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 4b20e2274313..19a48d98830a 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -218,6 +218,7 @@ enum pp_mp1_state {
PP_MP1_STATE_SHUTDOWN,
PP_MP1_STATE_UNLOAD,
PP_MP1_STATE_RESET,
+   PP_MP1_STATE_FLR,
 };
 
 enum pp_df_cstate {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 78c3f94bb3ff..b85478b1eaa7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2638,6 +2638,26 @@ static int smu_v13_0_6_send_rma_reason(struct 
smu_context *smu)
return ret;
 }
 
+static int smu_v13_0_6_set_mp1_state(struct smu_context *smu,
+   enum pp_mp1_state mp1_state)
+{
+   int ret =0;
+
+   switch (mp1_state) {
+   case PP_MP1_STATE_FLR:
+   /* VF lost access to SMU */
+   smu->adev->pm.dpm_enabled = false;
+   ret = 0;
+   break;
+   default:
+   /* Ignore others */
+   ret = 0;
+   }
+
+   return ret;
+}
+
+
 static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
 {
struct smu_context *smu = adev->powerplay.pp_handle;
@@ -3283,6 +3303,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = 
{
.i2c_fini = smu_v13_0_6_i2c_control_fini,
.send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num,
.send_rma_reason = smu_

[PATCH v2 2/2] drm/amdgpu: abort KIQ waits when there is a pending reset

2024-08-06 Thread Victor Skvortsov
Stop waiting for the KIQ to return back when there is a reset pending.
It's quite likely that the KIQ will never response.

Signed-off-by: Koenig Christian 
Suggested-by: Lazar Lijo 
Tested-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 5 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c02659025656..8962be257942 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -785,7 +785,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device 
*adev,
goto failed_kiq;
 
might_sleep();
-   while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+   while (r < 1 && cnt++ < MAX_KIQ_REG_TRY&&
+   !amdgpu_reset_pending(adev->reset_domain)) {
 
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 4ae581f3fcb5..f33a4e0ffba1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -136,6 +136,11 @@ static inline bool amdgpu_reset_domain_schedule(struct 
amdgpu_reset_domain *doma
return queue_work(domain->wq, work);
 }
 
+static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) {
+   lockdep_assert_held(&domain->sem);
+   return rwsem_is_contended(&domain->sem);
+}
+
 void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
 
 void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain 
*reset_domain);
-- 
2.34.1



[PATCH v2 1/2] Revert "drm/amdgpu: Extend KIQ reg polling wait for VF"

2024-08-06 Thread Victor Skvortsov
KIQ timeouts no longer seen.

This reverts commit b4d12cc00ad69e8a0dea2ece7202bacfd8b894fb.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 137a88b8de45..206360503136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -347,9 +347,9 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
 };
-#define SRIOV_USEC_TIMEOUT 120 /* wait 12 * 100ms for SRIOV */
-#define MAX_KIQ_REG_WAIT (amdgpu_sriov_vf(adev) ? 5 : 5000) /* in usecs, 
extend for VF */
-#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
+#define SRIOV_USEC_TIMEOUT  120 /* wait 12 * 100ms for SRIOV */
+#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
 #define MAX_KIQ_REG_TRY 1000
 
 int amdgpu_device_ip_set_clockgating_state(void *dev,
-- 
2.34.1



[PATCH 2/2] drm/amdgpu: abort KIQ waits when there is a pending reset

2024-08-02 Thread Victor Skvortsov
Stop waiting for the KIQ to return back when there is a reset pending.
It's quite likely that the KIQ will never response.

Signed-off-by: Victor Skvortsov 
Suggested-by: Lazar Lijo 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 5 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c02659025656..8962be257942 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -785,7 +785,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device 
*adev,
goto failed_kiq;
 
might_sleep();
-   while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+   while (r < 1 && cnt++ < MAX_KIQ_REG_TRY&&
+   !amdgpu_reset_pending(adev->reset_domain)) {
 
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 4ae581f3fcb5..f33a4e0ffba1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -136,6 +136,11 @@ static inline bool amdgpu_reset_domain_schedule(struct 
amdgpu_reset_domain *doma
return queue_work(domain->wq, work);
 }
 
+static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) {
+   lockdep_assert_held(&domain->sem);
+   return rwsem_is_contended(&domain->sem);
+}
+
 void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
 
 void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain 
*reset_domain);
-- 
2.34.1



[PATCH 1/2] Revert "drm/amdgpu: Extend KIQ reg polling wait for VF"

2024-08-02 Thread Victor Skvortsov
KIQ timeouts no longer seen.

This reverts commit b4d12cc00ad69e8a0dea2ece7202bacfd8b894fb.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 137a88b8de45..206360503136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -347,9 +347,9 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
 };
-#define SRIOV_USEC_TIMEOUT 120 /* wait 12 * 100ms for SRIOV */
-#define MAX_KIQ_REG_WAIT (amdgpu_sriov_vf(adev) ? 5 : 5000) /* in usecs, 
extend for VF */
-#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
+#define SRIOV_USEC_TIMEOUT  120 /* wait 12 * 100ms for SRIOV */
+#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
 #define MAX_KIQ_REG_TRY 1000
 
 int amdgpu_device_ip_set_clockgating_state(void *dev,
-- 
2.34.1



[PATCH] drm/amdgpu: Add lock around VF RLCG interface

2024-05-27 Thread Victor Skvortsov
flush_gpu_tlb may be called from another thread while
device_gpu_recover is running.

Both of these threads access registers through the VF
RLCG interface during VF Full Access. Add a lock around this interface
to prevent race conditions between these threads.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   | 2 ++
 3 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f5168b4c3b03..6711836054f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4049,6 +4049,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
+   mutex_init(&adev->virt.rlcg_reg_lock);
hash_init(adev->mn_hash);
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3d5f58e76f2d..a72683f83390 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -982,6 +982,9 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 
offset, u32 v, u32 f
scratch_reg1 = (void __iomem *)adev->rmmio + 4 * 
reg_access_ctrl->scratch_reg1;
scratch_reg2 = (void __iomem *)adev->rmmio + 4 * 
reg_access_ctrl->scratch_reg2;
scratch_reg3 = (void __iomem *)adev->rmmio + 4 * 
reg_access_ctrl->scratch_reg3;
+
+   mutex_lock(&adev->virt.rlcg_reg_lock);
+
if (reg_access_ctrl->spare_int)
spare_int = (void __iomem *)adev->rmmio + 4 * 
reg_access_ctrl->spare_int;
 
@@ -1038,6 +1041,9 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
}
 
ret = readl(scratch_reg0);
+
+   mutex_unlock(&adev->virt.rlcg_reg_lock);
+
return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 642f1fd287d8..0ec246c74570 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -272,6 +272,8 @@ struct amdgpu_virt {
 
/* the ucode id to signal the autoload */
uint32_t autoload_ucode_id;
+
+   struct mutex rlcg_reg_lock;
 };
 
 struct amdgpu_video_codec_info;
-- 
2.34.1



[PATCH 2/2] drm/amdgpu: Queue KFD reset workitem in VF FED

2024-05-19 Thread Victor Skvortsov
The guest recovery sequence is buggy in Fatal Error when both
FLR & KFD reset workitems are queued at the same time. In addition,
FLR guest recovery sequence is out of order when PF/VF communication
breaks due to a GPU fatal error

As a temporary work around, perform a KFD style reset (Initiate reset
request from the guest) inside the pf2vf thread on FED.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index d98d619fba97..3d5f58e76f2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -602,7 +602,7 @@ static void amdgpu_virt_update_vf2pf_work_item(struct 
work_struct *work)
amdgpu_sriov_runtime(adev)) {
amdgpu_ras_set_fed(adev, true);
if (amdgpu_reset_domain_schedule(adev->reset_domain,
- &adev->virt.flr_work))
+ 
&adev->kfd.reset_work))
return;
else
dev_err(adev->dev, "Failed to queue work! at 
%s", __func__);
-- 
2.34.1



[PATCH 1/2] drm/amdgpu: Extend KIQ reg polling wait for VF

2024-05-19 Thread Victor Skvortsov
Runtime KIQ interface to read/write registers in VF may take longer
than expected for BM environment. Extend the timeout.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d749c6abdc5e..e8980b6009c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -348,9 +348,9 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
 };
-#define SRIOV_USEC_TIMEOUT  120 /* wait 12 * 100ms for SRIOV */
-#define MAX_KIQ_REG_WAIT   5000 /* in usecs, 5ms */
-#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
+#define SRIOV_USEC_TIMEOUT 120 /* wait 12 * 100ms for SRIOV */
+#define MAX_KIQ_REG_WAIT amdgpu_sriov_vf(adev) ? 5 : 5000 /* in usecs, 
extend for VF */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
 #define MAX_KIQ_REG_TRY 1000
 
 int amdgpu_device_ip_set_clockgating_state(void *dev,
-- 
2.34.1



[PATCH] drm/amdgpu: Remove wrong fini_data_exchange call

2024-05-17 Thread Victor Skvortsov
This call is already done inside amdgpu_device_pre_asic_reset.
If should_recover_gpu is false, then vf2pf worker thread is
permanently killed.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 2 --
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 2 --
 2 files changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index f4c47492e0cd..8b07328a4b7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -264,8 +264,6 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
 
down_write(&adev->reset_domain->sem);
 
-   amdgpu_virt_fini_data_exchange(adev);
-
xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
 
do {
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 37b49a5ed2a1..8a2f2feb5130 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -297,8 +297,6 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct 
*work)
 
down_write(&adev->reset_domain->sem);
 
-   amdgpu_virt_fini_data_exchange(adev);
-
xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
 
do {
-- 
2.34.1



[PATCH] drm/amdgpu: Skip virt_exchange_init on SDMA poison consumption

2024-03-12 Thread Victor Skvortsov
From: Victor Skvortsov 

Host will initiate an FLR in SDMA poison consumption scenario.
Guest should wait for FLR message to re-init data exchange.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 77f5b55decf9..a1bad772d932 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -444,7 +444,8 @@ static void xgpu_nv_ras_poison_handler(struct amdgpu_device 
*adev,
amdgpu_virt_fini_data_exchange(adev);
xgpu_nv_send_access_requests_with_param(adev,
IDH_RAS_POISON, block, 0, 0);
-   amdgpu_virt_init_data_exchange(adev);
+   if (block != AMDGPU_RAS_BLOCK__SDMA)
+   amdgpu_virt_init_data_exchange(adev);
}
 }
 
-- 
2.25.1



[PATCH] drm/amdgpu/: Remove bo_create_kernel_at path from virt page

2024-03-12 Thread Victor Skvortsov
Use amdgpu_vram_mgr to reserve bad page ranges.
Reserved ranges will be freed by amdgpu_vram_mgr_fini()
Delete bo_create path as it is redundant.

Suggested-by: Christian König 
Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 55 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  2 -
 2 files changed, 3 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 7a4eae36778a..2a20714b9c16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -244,7 +244,6 @@ static int amdgpu_virt_init_ras_err_handler_data(struct 
amdgpu_device *adev)
 */
unsigned int align_space = 512;
void *bps = NULL;
-   struct amdgpu_bo **bps_bo = NULL;
 
*data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), 
GFP_KERNEL);
if (!*data)
@@ -254,12 +253,7 @@ static int amdgpu_virt_init_ras_err_handler_data(struct 
amdgpu_device *adev)
if (!bps)
goto bps_failure;
 
-   bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), 
GFP_KERNEL);
-   if (!bps_bo)
-   goto bps_bo_failure;
-
(*data)->bps = bps;
-   (*data)->bps_bo = bps_bo;
(*data)->count = 0;
(*data)->last_reserved = 0;
 
@@ -267,34 +261,12 @@ static int amdgpu_virt_init_ras_err_handler_data(struct 
amdgpu_device *adev)
 
return 0;
 
-bps_bo_failure:
-   kfree(bps);
 bps_failure:
kfree(*data);
 data_failure:
return -ENOMEM;
 }
 
-static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
-{
-   struct amdgpu_virt *virt = &adev->virt;
-   struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
-   struct amdgpu_bo *bo;
-   int i;
-
-   if (!data)
-   return;
-
-   for (i = data->last_reserved - 1; i >= 0; i--) {
-   bo = data->bps_bo[i];
-   if (bo) {
-   amdgpu_bo_free_kernel(&bo, NULL, NULL);
-   data->bps_bo[i] = bo;
-   }
-   data->last_reserved = i;
-   }
-}
-
 void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev)
 {
struct amdgpu_virt *virt = &adev->virt;
@@ -305,10 +277,7 @@ void amdgpu_virt_release_ras_err_handler_data(struct 
amdgpu_device *adev)
if (!data)
return;
 
-   amdgpu_virt_ras_release_bp(adev);
-
kfree(data->bps);
-   kfree(data->bps_bo);
kfree(data);
virt->virt_eh_data = NULL;
 }
@@ -330,9 +299,6 @@ static void amdgpu_virt_ras_reserve_bps(struct 
amdgpu_device *adev)
 {
struct amdgpu_virt *virt = &adev->virt;
struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
-   struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
-   struct ttm_resource_manager *man = &mgr->manager;
-   struct amdgpu_bo *bo = NULL;
uint64_t bp;
int i;
 
@@ -341,26 +307,11 @@ static void amdgpu_virt_ras_reserve_bps(struct 
amdgpu_device *adev)
 
for (i = data->last_reserved; i < data->count; i++) {
bp = data->bps[i].retired_page;
+   if (amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
+   bp << AMDGPU_GPU_PAGE_SHIFT, AMDGPU_GPU_PAGE_SIZE))
+   DRM_DEBUG("RAS WARN: reserve vram for retired page %llx 
fail\n", bp);
 
-   /* There are two cases of reserve error should be ignored:
-* 1) a ras bad page has been allocated (used by someone);
-* 2) a ras bad page has been reserved (duplicate error 
injection
-*for one page);
-*/
-   if  (ttm_resource_manager_used(man)) {
-   amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
-   bp << AMDGPU_GPU_PAGE_SHIFT,
-   AMDGPU_GPU_PAGE_SIZE);
-   data->bps_bo[i] = NULL;
-   } else {
-   if (amdgpu_bo_create_kernel_at(adev, bp << 
AMDGPU_GPU_PAGE_SHIFT,
-   AMDGPU_GPU_PAGE_SIZE,
-   &bo, NULL))
-   DRM_DEBUG("RAS WARN: reserve vram for retired 
page %llx fail\n", bp);
-   data->bps_bo[i] = bo;
-   }
data->last_reserved = i + 1;
-   bo = NULL;
}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 3f59b7b5523f..15599951e7b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -224,8 +224,6 

[PATCH] drm/amdgpu: Fix wait for RLCG command completion

2022-02-03 Thread Victor Skvortsov
if (!(tmp & flag)) condition will always evaluate to true
when the flag is 0x0 (AMDGPU_RLCG_GC_WRITE). Instead check
that address bits are cleared to determine whether
the command is complete.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index e1288901beb6..a8babe3bccb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -902,7 +902,7 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device 
*adev, u32 offset, u32 v
 
for (i = 0; i < timeout; i++) {
tmp = readl(scratch_reg1);
-   if (!(tmp & flag))
+   if (!(tmp & AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK))
break;
udelay(10);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 40803aab136f..68f592f0e992 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -43,6 +43,8 @@
 #define AMDGPU_RLCG_WRONG_OPERATION_TYPE   0x200
 #define AMDGPU_RLCG_REG_NOT_IN_RANGE   0x100
 
+#define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK  0xF
+
 /* all asic after AI use this offset */
 #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
 /* tonga/fiji use this offset */
-- 
2.25.1



[PATCH v3 5/5] drm/amdgpu: Modify indirect register access for gfx9 sriov

2021-12-16 Thread Victor Skvortsov
Expand RLCG interface for new GC read & write commands.
New interface will only be used if the PF enables the flag in pf2vf msg.

v2: Added a description for the scratch registers

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 117 --
 1 file changed, 89 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index edb3e3b08eed..9189fb85a4dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -63,6 +63,13 @@
 #define mmGCEA_PROBE_MAP0x070c
 #define mmGCEA_PROBE_MAP_BASE_IDX   0
 
+#define GFX9_RLCG_GC_WRITE_OLD (0x8 << 28)
+#define GFX9_RLCG_GC_WRITE (0x0 << 28)
+#define GFX9_RLCG_GC_READ  (0x1 << 28)
+#define GFX9_RLCG_VFGATE_DISABLED  0x400
+#define GFX9_RLCG_WRONG_OPERATION_TYPE 0x200
+#define GFX9_RLCG_NOT_IN_RANGE 0x100
+
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
@@ -739,7 +746,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 };
 
-static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 
flag)
+static u32 gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, 
uint32_t flag)
 {
static void *scratch_reg0;
static void *scratch_reg1;
@@ -748,21 +755,20 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
static void *spare_int;
static uint32_t grbm_cntl;
static uint32_t grbm_idx;
+   uint32_t i = 0;
+   uint32_t retries = 5;
+   u32 ret = 0;
+   u32 tmp;
 
scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
scratch_reg1 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
-   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
-   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4;
+   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4;
spare_int = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
 
grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + 
mmGRBM_GFX_CNTL;
grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + 
mmGRBM_GFX_INDEX;
 
-   if (amdgpu_sriov_runtime(adev)) {
-   pr_err("shouldn't call rlcg write register during runtime\n");
-   return;
-   }
-
if (offset == grbm_cntl || offset == grbm_idx) {
if (offset  == grbm_cntl)
writel(v, scratch_reg2);
@@ -771,41 +777,95 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
 
writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
} else {
-   uint32_t i = 0;
-   uint32_t retries = 5;
-
+   /*
+* SCRATCH_REG0 = read/write value
+* SCRATCH_REG1[30:28]  = command
+* SCRATCH_REG1[19:0]   = address in dword
+* SCRATCH_REG1[26:24]  = Error reporting
+*/
writel(v, scratch_reg0);
-   writel(offset | 0x8000, scratch_reg1);
+   writel(offset | flag, scratch_reg1);
writel(1, spare_int);
-   for (i = 0; i < retries; i++) {
-   u32 tmp;
 
+   for (i = 0; i < retries; i++) {
tmp = readl(scratch_reg1);
-   if (!(tmp & 0x8000))
+   if (!(tmp & flag))
break;
 
udelay(10);
}
-   if (i >= retries)
-   pr_err("timeout: rlcg program reg:0x%05x failed !\n", 
offset);
+
+   if (i >= retries) {
+   if (amdgpu_sriov_reg_indirect_gc(adev)) {
+   if (tmp & GFX9_RLCG_VFGATE_DISABLED)
+   pr_err("The vfgate is disabled, program 
reg:0x%05x failed!\n", offset);
+   else if (tmp & GFX9_RLCG_WRONG_OPERATION_TYPE)
+   pr_err("Wrong operatio

[PATCH v3 4/5] drm/amdgpu: get xgmi info before ip_init

2021-12-16 Thread Victor Skvortsov
Driver needs to call get_xgmi_info() before ip_init
to determine whether it needs to handle a pending hive reset.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 --
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 6 --
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5bd785cfc5ca..4fd370016834 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3576,6 +3576,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
 
+   /* Need to get xgmi info early to decide the reset behavior*/
+   if (adev->gmc.xgmi.supported) {
+   r = adev->gfxhub.funcs->get_xgmi_info(adev);
+   if (r)
+   return r;
+   }
+
/* enable PCIE atomic ops */
if (amdgpu_sriov_vf(adev))
adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info 
*)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index ae46eb35b3d7..3d5d47a799e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -914,12 +914,6 @@ static int gmc_v10_0_sw_init(void *handle)
return r;
}
 
-   if (adev->gmc.xgmi.supported) {
-   r = adev->gfxhub.funcs->get_xgmi_info(adev);
-   if (r)
-   return r;
-   }
-
r = gmc_v10_0_mc_init(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2b86c63b032a..57f2729a7bd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1628,12 +1628,6 @@ static int gmc_v9_0_sw_init(void *handle)
}
adev->need_swiotlb = drm_need_swiotlb(44);
 
-   if (adev->gmc.xgmi.supported) {
-   r = adev->gfxhub.funcs->get_xgmi_info(adev);
-   if (r)
-   return r;
-   }
-
r = gmc_v9_0_mc_init(adev);
if (r)
return r;
-- 
2.25.1



[PATCH v3 3/5] drm/amdgpu: Modify indirect register access for amdkfd_gfx_v9 sriov

2021-12-16 Thread Victor Skvortsov
Modify GC register access from MMIO to RLCG if the indirect
flag is set

Signed-off-by: Victor Skvortsov 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index ddfe7aff919d..1abf662a0e91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -166,7 +166,7 @@ int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, 
uint32_t pipe_id)
 
lock_srbm(adev, mec, pipe, 0, 0);
 
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+   WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 
@@ -279,7 +279,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void 
*mqd,
   lower_32_bits((uintptr_t)wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, 
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
   upper_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+   WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
   (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
}
 
@@ -488,13 +488,13 @@ bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device 
*adev,
uint32_t low, high;
 
acquire_queue(adev, pipe_id, queue_id);
-   act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+   act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8);
 
-   if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
-  high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+   if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
+  high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
retval = true;
}
release_queue(adev);
@@ -556,7 +556,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void 
*mqd,
 
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
-   temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+   temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
if (time_after(jiffies, end_jiffies)) {
@@ -645,7 +645,7 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device 
*adev,
mutex_lock(&adev->grbm_idx_mutex);
 
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+   WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);
 
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
INSTANCE_BROADCAST_WRITES, 1);
@@ -722,7 +722,7 @@ static void get_wave_count(struct amdgpu_device *adev, int 
queue_idx,
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
-   reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
+   reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, 
mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
 queue_slot);
*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
if (*wave_cnt != 0)
@@ -809,8 +809,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device 
*adev, int pasid,
for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
 
gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0x);
-   queue_map = RREG32(SOC15_REG_OFFSET(GC, 0,
-  mmSPI_CSQ_WF_ACTIVE_STATUS));
+   queue_map = RREG32_SOC15(GC, 0, 
mmSPI_CSQ_WF_ACTIVE_STATUS);
 
/*
 * Assumption: queue map encodes following schema: four
@@ -860,17 +859,17 @@ void kgd_gfx_v9_program_trap_handler_settings(struct 
amdgpu_device *adev,
/*
 * Program TBA registers
 */
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+   WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO,
 lower_32_bits(tba_addr >> 8));
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+   WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_HI,
 upper_32_bits(tba_addr >> 8));
 
/*
 * Program TMA registers
 */
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+   WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_LO,
lower_32_bits(tma_addr >> 8));
-   WREG32(SOC15_REG_OFFSET(GC, 

[PATCH v3 2/5] drm/amdgpu: Modify indirect register access for gmc_v9_0 sriov

2021-12-16 Thread Victor Skvortsov
Modify GC register access from MMIO to RLCG if the
indirect flag is set

v2: Replaced ternary operator with if-else for better
readability

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 57 ---
 1 file changed, 43 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index a5471923b3f6..2b86c63b032a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -478,9 +478,18 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
amdgpu_device *adev,
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
-   tmp = RREG32(reg);
+
+   if (j == AMDGPU_GFXHUB_0)
+   tmp = RREG32_SOC15_IP(GC, reg);
+   else
+   tmp = RREG32_SOC15_IP(MMHUB, reg);
+
tmp &= ~bits;
-   WREG32(reg, tmp);
+
+   if (j == AMDGPU_GFXHUB_0)
+   WREG32_SOC15_IP(GC, reg, tmp);
+   else
+   WREG32_SOC15_IP(MMHUB, reg, tmp);
}
}
break;
@@ -489,9 +498,18 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
amdgpu_device *adev,
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
-   tmp = RREG32(reg);
+
+   if (j == AMDGPU_GFXHUB_0)
+   tmp = RREG32_SOC15_IP(GC, reg);
+   else
+   tmp = RREG32_SOC15_IP(MMHUB, reg);
+
tmp |= bits;
-   WREG32(reg, tmp);
+
+   if (j == AMDGPU_GFXHUB_0)
+   WREG32_SOC15_IP(GC, reg, tmp);
+   else
+   WREG32_SOC15_IP(MMHUB, reg, tmp);
}
}
break;
@@ -788,9 +806,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
/* TODO: It needs to continue working on debugging with semaphore for 
GFXHUB as well. */
if (use_semaphore) {
for (j = 0; j < adev->usec_timeout; j++) {
-   /* a read return value of 1 means semaphore acuqire */
-   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
-   hub->eng_distance * eng);
+   /* a read return value of 1 means semaphore acquire */
+   if (vmhub == AMDGPU_GFXHUB_0)
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   else
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+
if (tmp & 0x1)
break;
udelay(1);
@@ -801,8 +822,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
}
 
do {
-   WREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng, inv_req);
+   if (vmhub == AMDGPU_GFXHUB_0)
+   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   else
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
 
/*
 * Issue a dummy read to wait for the ACK register to
@@ -815,8 +838,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
  hub->eng_distance * eng);
 
for (j = 0; j < adev->usec_timeout; j++) {
-   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
-   hub->eng_distance * eng);
+   if (vmhub == AMDGPU_GFXHUB_0)
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+   else
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+
if (tmp & (1 << vmid))
break;
udelay(1);
@@ 

[PATCH v3 1/5] drm/amdgpu: Add *_SOC15_IP_NO_KIQ() macro definitions

2021-12-16 Thread Victor Skvortsov
Add helper macros to change register access
from direct to indirect.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/soc15_common.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 8a9ca87d8663..473767e03676 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -51,6 +51,8 @@
 
 #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP)
 
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+
 #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
 AMDGPU_REGS_NO_KIQ, ip##_HWIP)
@@ -65,6 +67,9 @@
 #define WREG32_SOC15_IP(ip, reg, value) \
 __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP)
 
+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
+__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+
 #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
__WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
 value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
-- 
2.25.1



[PATCH v3 0/5] *** GFX9 RLCG Interface modifications ***

2021-12-16 Thread Victor Skvortsov
This patchset introduces an expanded sriov RLCG interface.
This interface will be used by Aldebaran in sriov
mode for indirect GC register access during full access.

v2: Added descriptions to scratch registers, and improved code readability.

v3: Remove the RLC function pointer init change. Move xgmi_info call
to earlier in init.

Victor Skvortsov (5):
  drm/amdgpu: Add *_SOC15_IP_NO_KIQ() macro definitions
  drm/amdgpu: Modify indirect register access for gmc_v9_0 sriov
  drm/amdgpu: Modify indirect register access for amdkfd_gfx_v9 sriov
  drm/amdgpu: get xgmi info before ip_init
  drm/amdgpu: Modify indirect register access for gfx9 sriov

 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  27 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   7 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 117 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c|   6 -
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  63 +++---
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |   5 +
 6 files changed, 157 insertions(+), 68 deletions(-)

-- 
2.25.1



[PATCH v2] drm/amdgpu: Separate vf2pf work item init from virt data exchange

2021-12-16 Thread Victor Skvortsov
We want to be able to call virt data exchange conditionally
after gmc sw init to reserve bad pages as early as possible.
Since this is a conditional call, we will need to call
it again unconditionally later in the init sequence.

Refactor the data exchange function so it can be
called multiple times without re-initializing the work item.

v2: Cleaned up the code. Kept the original call to init_exchange_data()
inside early init to initialize the work item, afterwards call
exchange_data() when needed.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 36 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |  1 +
 3 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 48aeca3b8f16..ddc67b900587 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2316,6 +2316,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
 
/* need to do gmc hw init early so we can allocate gpu mem */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+   /* Try to reserve bad pages early */
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_virt_exchange_data(adev);
+
r = amdgpu_device_vram_scratch_init(adev);
if (r) {
DRM_ERROR("amdgpu_vram_scratch_init failed 
%d\n", r);
@@ -2347,7 +2351,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
}
 
if (amdgpu_sriov_vf(adev))
-   amdgpu_virt_init_data_exchange(adev);
+   amdgpu_virt_exchange_data(adev);
 
r = amdgpu_ib_pool_init(adev);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3fc49823f527..f8e574cc0e22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -622,17 +622,35 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device 
*adev)
 
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
 {
-   uint64_t bp_block_offset = 0;
-   uint32_t bp_block_size = 0;
-   struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
-
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
 
-   if (adev->mman.fw_vram_usage_va != NULL) {
+   if (adev->bios != NULL) {
adev->virt.vf2pf_update_interval_ms = 2000;
 
+   adev->virt.fw_reserve.p_pf2vf =
+   (struct amd_sriov_msg_pf2vf_info_header *)
+   (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+
+   amdgpu_virt_read_pf2vf_data(adev);
+   }
+
+   if (adev->virt.vf2pf_update_interval_ms != 0) {
+   INIT_DELAYED_WORK(&adev->virt.vf2pf_work, 
amdgpu_virt_update_vf2pf_work_item);
+   schedule_delayed_work(&(adev->virt.vf2pf_work), 
msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
+   }
+}
+
+
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
+{
+   uint64_t bp_block_offset = 0;
+   uint32_t bp_block_size = 0;
+   struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
+
+   if (adev->mman.fw_vram_usage_va != NULL) {
+
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->mman.fw_vram_usage_va + 
(AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
@@ -663,16 +681,10 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device 
*adev)
(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
 
amdgpu_virt_read_pf2vf_data(adev);
-
-   return;
-   }
-
-   if (adev->virt.vf2pf_update_interval_ms != 0) {
-   INIT_DELAYED_WORK(&adev->virt.vf2pf_work, 
amdgpu_virt_update_vf2pf_work_item);
-   schedule_delayed_work(&(adev->virt.vf2pf_work), 
adev->virt.vf2pf_update_interval_ms);
}
 }
 
+
 void amdgpu_detect_virtualization(struct amdgpu_device *adev)
 {
uint32_t reg;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 8d4c20bb71c5..9adfb8d63280 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -308,6 +308,7 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
 void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
+void amdgpu_v

[PATCH v2 5/5] drm/amdgpu: Modify indirect register access for gfx9 sriov

2021-12-15 Thread Victor Skvortsov
Expand RLCG interface for new GC read & write commands.
New interface will only be used if the PF enables the flag in pf2vf msg.

v2: Added a description for the scratch registers

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 117 --
 1 file changed, 89 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d252b06efa43..7a754cb8236e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -63,6 +63,13 @@
 #define mmGCEA_PROBE_MAP0x070c
 #define mmGCEA_PROBE_MAP_BASE_IDX   0
 
+#define GFX9_RLCG_GC_WRITE_OLD (0x8 << 28)
+#define GFX9_RLCG_GC_WRITE (0x0 << 28)
+#define GFX9_RLCG_GC_READ  (0x1 << 28)
+#define GFX9_RLCG_VFGATE_DISABLED  0x400
+#define GFX9_RLCG_WRONG_OPERATION_TYPE 0x200
+#define GFX9_RLCG_NOT_IN_RANGE 0x100
+
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
@@ -739,7 +746,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 };
 
-static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 
flag)
+static u32 gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, 
uint32_t flag)
 {
static void *scratch_reg0;
static void *scratch_reg1;
@@ -748,21 +755,20 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
static void *spare_int;
static uint32_t grbm_cntl;
static uint32_t grbm_idx;
+   uint32_t i = 0;
+   uint32_t retries = 5;
+   u32 ret = 0;
+   u32 tmp;
 
scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
scratch_reg1 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
-   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
-   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4;
+   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4;
spare_int = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
 
grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + 
mmGRBM_GFX_CNTL;
grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + 
mmGRBM_GFX_INDEX;
 
-   if (amdgpu_sriov_runtime(adev)) {
-   pr_err("shouldn't call rlcg write register during runtime\n");
-   return;
-   }
-
if (offset == grbm_cntl || offset == grbm_idx) {
if (offset  == grbm_cntl)
writel(v, scratch_reg2);
@@ -771,41 +777,95 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
 
writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
} else {
-   uint32_t i = 0;
-   uint32_t retries = 5;
-
+   /*
+* SCRATCH_REG0 = read/write value
+* SCRATCH_REG1[30:28]  = command
+* SCRATCH_REG1[19:0]   = address in dword
+* SCRATCH_REG1[26:24]  = Error reporting
+*/
writel(v, scratch_reg0);
-   writel(offset | 0x8000, scratch_reg1);
+   writel(offset | flag, scratch_reg1);
writel(1, spare_int);
-   for (i = 0; i < retries; i++) {
-   u32 tmp;
 
+   for (i = 0; i < retries; i++) {
tmp = readl(scratch_reg1);
-   if (!(tmp & 0x8000))
+   if (!(tmp & flag))
break;
 
udelay(10);
}
-   if (i >= retries)
-   pr_err("timeout: rlcg program reg:0x%05x failed !\n", 
offset);
+
+   if (i >= retries) {
+   if (amdgpu_sriov_reg_indirect_gc(adev)) {
+   if (tmp & GFX9_RLCG_VFGATE_DISABLED)
+   pr_err("The vfgate is disabled, program 
reg:0x%05x failed!\n", offset);
+   else if (tmp & GFX9_RLCG_WRONG_OPERATION_TYPE)
+   pr_err("Wrong operatio

[PATCH v2 4/5] drm/amdgpu: Initialize Aldebaran RLC function pointers

2021-12-15 Thread Victor Skvortsov
In SRIOV, RLC function pointers must be initialized early as
we rely on the RLCG interface for all GC register access.

v2: Make aldebaran a seperate case

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 4 
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 65e1f6cc59dd..d7e1b503cd3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -842,8 +842,12 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct 
amdgpu_device *adev)
case IP_VERSION(9, 3, 0):
case IP_VERSION(9, 4, 0):
case IP_VERSION(9, 4, 1):
+   amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+   break;
case IP_VERSION(9, 4, 2):
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+   if (amdgpu_sriov_vf(adev))
+   gfx_v9_0_set_rlc_funcs(adev);
break;
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 2):
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index edb3e3b08eed..d252b06efa43 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -816,7 +816,6 @@ static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, 
u32 offset,
 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
-static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
@@ -7066,7 +7065,7 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device 
*adev)
adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
 }
 
-static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
+void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
 {
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
index dfe8d4841f58..1817e252354f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
@@ -29,4 +29,6 @@ extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block;
 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
   u32 instance);
 
+void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
+
 #endif
-- 
2.25.1



[PATCH v2 1/5] drm/amdgpu: Add *_SOC15_IP_NO_KIQ() macro definitions

2021-12-15 Thread Victor Skvortsov
Add helper macros to change register access
from direct to indirect.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/soc15_common.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 8a9ca87d8663..473767e03676 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -51,6 +51,8 @@
 
 #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP)
 
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+
 #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
 AMDGPU_REGS_NO_KIQ, ip##_HWIP)
@@ -65,6 +67,9 @@
 #define WREG32_SOC15_IP(ip, reg, value) \
 __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP)
 
+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
+__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+
 #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
__WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
 value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
-- 
2.25.1



[PATCH v2 2/5] drm/amdgpu: Modify indirect register access for gmc_v9_0 sriov

2021-12-15 Thread Victor Skvortsov
Modify GC register access from MMIO to RLCG if the
indirect flag is set

v2: Replaced ternary operator with if-else for better
readability

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 57 ---
 1 file changed, 43 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index db2ec84f7237..e85a264c971a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -478,9 +478,18 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
amdgpu_device *adev,
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
-   tmp = RREG32(reg);
+
+   if (j == AMDGPU_GFXHUB_0)
+   tmp = RREG32_SOC15_IP(GC, reg);
+   else
+   tmp = RREG32_SOC15_IP(MMHUB, reg);
+
tmp &= ~bits;
-   WREG32(reg, tmp);
+
+   if (j == AMDGPU_GFXHUB_0)
+   WREG32_SOC15_IP(GC, reg, tmp);
+   else
+   WREG32_SOC15_IP(MMHUB, reg, tmp);
}
}
break;
@@ -489,9 +498,18 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
amdgpu_device *adev,
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
-   tmp = RREG32(reg);
+
+   if (j == AMDGPU_GFXHUB_0)
+   tmp = RREG32_SOC15_IP(GC, reg);
+   else
+   tmp = RREG32_SOC15_IP(MMHUB, reg);
+
tmp |= bits;
-   WREG32(reg, tmp);
+
+   if (j == AMDGPU_GFXHUB_0)
+   WREG32_SOC15_IP(GC, reg, tmp);
+   else
+   WREG32_SOC15_IP(MMHUB, reg, tmp);
}
}
break;
@@ -788,9 +806,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
/* TODO: It needs to continue working on debugging with semaphore for 
GFXHUB as well. */
if (use_semaphore) {
for (j = 0; j < adev->usec_timeout; j++) {
-   /* a read return value of 1 means semaphore acuqire */
-   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
-   hub->eng_distance * eng);
+   /* a read return value of 1 means semaphore acquire */
+   if (vmhub == AMDGPU_GFXHUB_0)
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   else
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+
if (tmp & 0x1)
break;
udelay(1);
@@ -801,8 +822,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
}
 
do {
-   WREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng, inv_req);
+   if (vmhub == AMDGPU_GFXHUB_0)
+   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   else
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
 
/*
 * Issue a dummy read to wait for the ACK register to
@@ -815,8 +838,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
  hub->eng_distance * eng);
 
for (j = 0; j < adev->usec_timeout; j++) {
-   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
-   hub->eng_distance * eng);
+   if (vmhub == AMDGPU_GFXHUB_0)
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+   else
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+
if (tmp & (1 << vmid))
break;
udelay(1);
@@ 

[PATCH v2 3/5] drm/amdgpu: Modify indirect register access for amdkfd_gfx_v9 sriov

2021-12-15 Thread Victor Skvortsov
Modify GC register access from MMIO to RLCG if the indirect
flag is set

Signed-off-by: Victor Skvortsov 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index ddfe7aff919d..1abf662a0e91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -166,7 +166,7 @@ int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, 
uint32_t pipe_id)
 
lock_srbm(adev, mec, pipe, 0, 0);
 
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+   WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 
@@ -279,7 +279,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void 
*mqd,
   lower_32_bits((uintptr_t)wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, 
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
   upper_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+   WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
   (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
}
 
@@ -488,13 +488,13 @@ bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device 
*adev,
uint32_t low, high;
 
acquire_queue(adev, pipe_id, queue_id);
-   act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+   act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8);
 
-   if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
-  high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+   if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
+  high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
retval = true;
}
release_queue(adev);
@@ -556,7 +556,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void 
*mqd,
 
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
-   temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+   temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
if (time_after(jiffies, end_jiffies)) {
@@ -645,7 +645,7 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device 
*adev,
mutex_lock(&adev->grbm_idx_mutex);
 
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+   WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);
 
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
INSTANCE_BROADCAST_WRITES, 1);
@@ -722,7 +722,7 @@ static void get_wave_count(struct amdgpu_device *adev, int 
queue_idx,
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
-   reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
+   reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, 
mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
 queue_slot);
*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
if (*wave_cnt != 0)
@@ -809,8 +809,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device 
*adev, int pasid,
for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
 
gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0x);
-   queue_map = RREG32(SOC15_REG_OFFSET(GC, 0,
-  mmSPI_CSQ_WF_ACTIVE_STATUS));
+   queue_map = RREG32_SOC15(GC, 0, 
mmSPI_CSQ_WF_ACTIVE_STATUS);
 
/*
 * Assumption: queue map encodes following schema: four
@@ -860,17 +859,17 @@ void kgd_gfx_v9_program_trap_handler_settings(struct 
amdgpu_device *adev,
/*
 * Program TBA registers
 */
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+   WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO,
 lower_32_bits(tba_addr >> 8));
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+   WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_HI,
 upper_32_bits(tba_addr >> 8));
 
/*
 * Program TMA registers
 */
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+   WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_LO,
lower_32_bits(tma_addr >> 8));
-   WREG32(SOC15_REG_OFFSET(GC, 

[PATCH v2 0/5] *** GFX9 RLCG Interface modifications ***

2021-12-15 Thread Victor Skvortsov
This patchset introduces an expanded sriov RLCG interface.
This interface will be used by Aldebaran in sriov mode
for indirect GC register access during full access.

v2: Added descriptions to scratch registers, and 
improved code readability.

Victor Skvortsov (5):
  drm/amdgpu: Add *_SOC15_IP_NO_KIQ() macro definitions
  drm/amdgpu: Modify indirect register access for gmc_v9_0 sriov
  drm/amdgpu: Modify indirect register access for amdkfd_gfx_v9 sriov
  drm/amdgpu: Initialize Aldebaran RLC function pointers
  drm/amdgpu: Modify indirect register access for gfx9 sriov

 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  27 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c |   4 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 120 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  57 +++--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |   5 +
 6 files changed, 157 insertions(+), 58 deletions(-)

-- 
2.25.1



[PATCH 4/5] drm/amdgpu: Initialize Aldebaran RLC function pointers

2021-12-15 Thread Victor Skvortsov
In SRIOV, RLC function pointers must be initialized early as
we rely on the RLCG interface for all GC register access.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 65e1f6cc59dd..1bc92a38d124 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -844,6 +844,8 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct 
amdgpu_device *adev)
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+   if (amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == 
IP_VERSION(9, 4, 2))
+   gfx_v9_0_set_rlc_funcs(adev);
break;
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 2):
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index edb3e3b08eed..d252b06efa43 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -816,7 +816,6 @@ static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, 
u32 offset,
 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
-static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
@@ -7066,7 +7065,7 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device 
*adev)
adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
 }
 
-static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
+void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
 {
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
index dfe8d4841f58..1817e252354f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
@@ -29,4 +29,6 @@ extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block;
 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
   u32 instance);
 
+void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
+
 #endif
-- 
2.25.1



[PATCH 2/5] drm/amdgpu: Modify indirect register access for gmc_v9_0 sriov

2021-12-15 Thread Victor Skvortsov
Modify GC register access from MMIO to RLCG if the
indirect flag is set

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 45 +++
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index db2ec84f7237..345ce7fc6463 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -478,9 +478,16 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
amdgpu_device *adev,
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
-   tmp = RREG32(reg);
+
+   tmp = (j == AMDGPU_GFXHUB_0) ?
+   RREG32_SOC15_IP(GC, reg) :
+   RREG32_SOC15_IP(MMHUB, reg);
+
tmp &= ~bits;
-   WREG32(reg, tmp);
+
+   (j == AMDGPU_GFXHUB_0) ?
+   WREG32_SOC15_IP(GC, reg, tmp) :
+   WREG32_SOC15_IP(MMHUB, reg, tmp);
}
}
break;
@@ -489,9 +496,16 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
amdgpu_device *adev,
hub = &adev->vmhub[j];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
-   tmp = RREG32(reg);
+
+   tmp = (j == AMDGPU_GFXHUB_0) ?
+   RREG32_SOC15_IP(GC, reg) :
+   RREG32_SOC15_IP(MMHUB, reg);
+
tmp |= bits;
-   WREG32(reg, tmp);
+
+   (j == AMDGPU_GFXHUB_0) ?
+   WREG32_SOC15_IP(GC, reg, tmp) :
+   WREG32_SOC15_IP(MMHUB, reg, tmp);
}
}
break;
@@ -789,8 +803,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
if (use_semaphore) {
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acuqire */
-   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
-   hub->eng_distance * eng);
+   tmp = (vmhub == AMDGPU_GFXHUB_0) ?
+   RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng) :
+   RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
if (tmp & 0x1)
break;
udelay(1);
@@ -801,8 +816,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
}
 
do {
-   WREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng, inv_req);
+   (vmhub == AMDGPU_GFXHUB_0) ?
+   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req) :
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
 
/*
 * Issue a dummy read to wait for the ACK register to
@@ -815,8 +831,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
  hub->eng_distance * eng);
 
for (j = 0; j < adev->usec_timeout; j++) {
-   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
-   hub->eng_distance * eng);
+   tmp = (vmhub == AMDGPU_GFXHUB_0) ?
+   RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack 
+ hub->eng_distance * eng) :
+   RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_ack + hub->eng_distance * eng);
if (tmp & (1 << vmid))
break;
udelay(1);
@@ -827,13 +844,15 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
} while (inv_req);
 
/* TODO: It needs to continue working on debugging with semaphore for 
GFXHUB as well. */
-   if (use_semaphore)
+   if (use_semaphore) {
/*
 * add semaphore release after invalidation,
 * write with 0 means semaphore release
 */
-   WREG32_NO_KIQ(hub->vm

[PATCH 5/5] drm/amdgpu: Modify indirect register access for gfx9 sriov

2021-12-15 Thread Victor Skvortsov
Expand RLCG interface for new GC read & write commands.
New interface will only be used if the PF enables the flag in pf2vf msg.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 111 +++---
 1 file changed, 83 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d252b06efa43..bce6ab52cae0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -63,6 +63,13 @@
 #define mmGCEA_PROBE_MAP0x070c
 #define mmGCEA_PROBE_MAP_BASE_IDX   0
 
+#define GFX9_RLCG_GC_WRITE_OLD (0x8 << 28)
+#define GFX9_RLCG_GC_WRITE (0x0 << 28)
+#define GFX9_RLCG_GC_READ  (0x1 << 28)
+#define GFX9_RLCG_VFGATE_DISABLED  0x400
+#define GFX9_RLCG_WRONG_OPERATION_TYPE 0x200
+#define GFX9_RLCG_NOT_IN_RANGE 0x100
+
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
@@ -739,7 +746,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 };
 
-static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 
flag)
+static u32 gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, 
uint32_t flag)
 {
static void *scratch_reg0;
static void *scratch_reg1;
@@ -748,21 +755,20 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
static void *spare_int;
static uint32_t grbm_cntl;
static uint32_t grbm_idx;
+   uint32_t i = 0;
+   uint32_t retries = 5;
+   u32 ret = 0;
+   u32 tmp;
 
scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
scratch_reg1 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
-   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
-   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4;
+   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4;
spare_int = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
 
grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + 
mmGRBM_GFX_CNTL;
grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + 
mmGRBM_GFX_INDEX;
 
-   if (amdgpu_sriov_runtime(adev)) {
-   pr_err("shouldn't call rlcg write register during runtime\n");
-   return;
-   }
-
if (offset == grbm_cntl || offset == grbm_idx) {
if (offset  == grbm_cntl)
writel(v, scratch_reg2);
@@ -771,41 +777,89 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
 
writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
} else {
-   uint32_t i = 0;
-   uint32_t retries = 5;
-
writel(v, scratch_reg0);
-   writel(offset | 0x8000, scratch_reg1);
+   writel(offset | flag, scratch_reg1);
writel(1, spare_int);
-   for (i = 0; i < retries; i++) {
-   u32 tmp;
 
+   for (i = 0; i < retries; i++) {
tmp = readl(scratch_reg1);
-   if (!(tmp & 0x8000))
+   if (!(tmp & flag))
break;
 
udelay(10);
}
-   if (i >= retries)
-   pr_err("timeout: rlcg program reg:0x%05x failed !\n", 
offset);
+
+   if (i >= retries) {
+   if (amdgpu_sriov_reg_indirect_gc(adev)) {
+   if (tmp & GFX9_RLCG_VFGATE_DISABLED)
+   pr_err("The vfgate is disabled, program 
reg:0x%05x failed!\n", offset);
+   else if (tmp & GFX9_RLCG_WRONG_OPERATION_TYPE)
+   pr_err("Wrong operation type, program 
reg:0x%05x failed!\n", offset);
+   else if (tmp & GFX9_RLCG_NOT_IN_RANGE)
+   pr_err("The register is not in range, 
program reg:0x%05x failed!\n", offset);
+   else
+ 

[PATCH 1/5] drm/amdgpu: Add *_SOC15_IP_NO_KIQ() macro definitions

2021-12-15 Thread Victor Skvortsov
Add helper macros to change register access
from direct to indirect.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/soc15_common.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 8a9ca87d8663..473767e03676 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -51,6 +51,8 @@
 
 #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP)
 
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+
 #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
 AMDGPU_REGS_NO_KIQ, ip##_HWIP)
@@ -65,6 +67,9 @@
 #define WREG32_SOC15_IP(ip, reg, value) \
 __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP)
 
+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
+__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+
 #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
__WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
 value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
-- 
2.25.1



[PATCH 3/5] drm/amdgpu: Modify indirect register access for amdkfd_gfx_v9 sriov

2021-12-15 Thread Victor Skvortsov
Modify GC register access from MMIO to RLCG if the indirect
flag is set

Signed-off-by: Victor Skvortsov 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index ddfe7aff919d..1abf662a0e91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -166,7 +166,7 @@ int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, 
uint32_t pipe_id)
 
lock_srbm(adev, mec, pipe, 0, 0);
 
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+   WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 
@@ -279,7 +279,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void 
*mqd,
   lower_32_bits((uintptr_t)wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, 
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
   upper_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+   WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
   (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
}
 
@@ -488,13 +488,13 @@ bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device 
*adev,
uint32_t low, high;
 
acquire_queue(adev, pipe_id, queue_id);
-   act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+   act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (act) {
low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8);
 
-   if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
-  high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+   if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
+  high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
retval = true;
}
release_queue(adev);
@@ -556,7 +556,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void 
*mqd,
 
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
-   temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+   temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
if (time_after(jiffies, end_jiffies)) {
@@ -645,7 +645,7 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device 
*adev,
mutex_lock(&adev->grbm_idx_mutex);
 
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+   WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);
 
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
INSTANCE_BROADCAST_WRITES, 1);
@@ -722,7 +722,7 @@ static void get_wave_count(struct amdgpu_device *adev, int 
queue_idx,
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
-   reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
+   reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, 
mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
 queue_slot);
*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
if (*wave_cnt != 0)
@@ -809,8 +809,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device 
*adev, int pasid,
for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
 
gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0x);
-   queue_map = RREG32(SOC15_REG_OFFSET(GC, 0,
-  mmSPI_CSQ_WF_ACTIVE_STATUS));
+   queue_map = RREG32_SOC15(GC, 0, 
mmSPI_CSQ_WF_ACTIVE_STATUS);
 
/*
 * Assumption: queue map encodes following schema: four
@@ -860,17 +859,17 @@ void kgd_gfx_v9_program_trap_handler_settings(struct 
amdgpu_device *adev,
/*
 * Program TBA registers
 */
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+   WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO,
 lower_32_bits(tba_addr >> 8));
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+   WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_HI,
 upper_32_bits(tba_addr >> 8));
 
/*
 * Program TMA registers
 */
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+   WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_LO,
lower_32_bits(tma_addr >> 8));
-   WREG32(SOC15_REG_OFFSET(GC, 

[PATCH 0/5] *** GFX9 RLCG Interface modifications ***

2021-12-15 Thread Victor Skvortsov
This patchset introduces an expanded sriov RLCG interface.
This interface will be used by Aldebaran in sriov mode for
indirect GC register access during full access.

Victor Skvortsov (5):
  drm/amdgpu: Add *_SOC15_IP_NO_KIQ() macro definitions
  drm/amdgpu: Modify indirect register access for gmc_v9_0 sriov
  drm/amdgpu: Modify indirect register access for amdkfd_gfx_v9 sriov
  drm/amdgpu: Initialize Aldebaran RLC function pointers
  drm/amdgpu: Modify indirect register access for gfx9 sriov

 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  27 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 114 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  45 +--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |   5 +
 6 files changed, 138 insertions(+), 57 deletions(-)

-- 
2.25.1



[PATCH] drm/amdgpu: SRIOV flr_work should use down_write

2021-12-09 Thread Victor Skvortsov
Host initiated VF FLR may fail if someone else
is already holding a read_lock. Change from
down_write_trylock to down_write to guarantee
the reset goes through.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 5 +++--
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index cd2719bc0139..e4365c97adaa 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -252,11 +252,12 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
 * otherwise the mailbox msg will be ruined/reseted by
 * the VF FLR.
 */
-   if (!down_write_trylock(&adev->reset_sem))
+   if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
return;
 
+   down_write(&adev->reset_sem);
+
amdgpu_virt_fini_vf2pf_work_item(adev);
-   atomic_set(&adev->in_gpu_reset, 1);
 
xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 2bc93808469a..1cde70c72e54 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -281,11 +281,12 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct 
*work)
 * otherwise the mailbox msg will be ruined/reseted by
 * the VF FLR.
 */
-   if (!down_write_trylock(&adev->reset_sem))
+   if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
return;
 
+   down_write(&adev->reset_sem);
+
amdgpu_virt_fini_vf2pf_work_item(adev);
-   atomic_set(&adev->in_gpu_reset, 1);
 
xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
 
-- 
2.25.1



[PATCH 1/2] drm/amdgpu: Separate vf2pf work item init from virt data exchange

2021-12-09 Thread Victor Skvortsov
We want to be able to call virt data exchange conditionally
after gmc sw init to reserve bad pages as early as possible.
Since this is a conditional call, we will need to call
it again unconditionally later in the init sequence.

Refactor the data exchange function so it can be
called multiple times without re-initializing the work item.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 42 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |  5 +--
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c  |  2 +-
 5 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ce9bdef185c0..3992c4086d26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2181,7 +2181,7 @@ static int amdgpu_device_ip_early_init(struct 
amdgpu_device *adev)
 
/*get pf2vf msg info at it's earliest time*/
if (amdgpu_sriov_vf(adev))
-   amdgpu_virt_init_data_exchange(adev);
+   amdgpu_virt_exchange_data(adev);
 
}
}
@@ -2345,8 +2345,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
}
}
 
-   if (amdgpu_sriov_vf(adev))
-   amdgpu_virt_init_data_exchange(adev);
+   if (amdgpu_sriov_vf(adev)) {
+   amdgpu_virt_exchange_data(adev);
+   amdgpu_virt_init_vf2pf_work_item(adev);
+   }
 
r = amdgpu_ib_pool_init(adev);
if (r) {
@@ -2949,7 +2951,7 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
int r;
 
if (amdgpu_sriov_vf(adev)) {
-   amdgpu_virt_fini_data_exchange(adev);
+   amdgpu_virt_fini_vf2pf_work_item(adev);
amdgpu_virt_request_full_gpu(adev, false);
}
 
@@ -3839,7 +3841,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 * */
if (amdgpu_sriov_vf(adev)) {
amdgpu_virt_request_full_gpu(adev, false);
-   amdgpu_virt_fini_data_exchange(adev);
+   amdgpu_virt_fini_vf2pf_work_item(adev);
}
 
/* disable all interrupts */
@@ -4317,7 +4319,9 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device 
*adev,
if (r)
goto error;
 
-   amdgpu_virt_init_data_exchange(adev);
+   amdgpu_virt_exchange_data(adev);
+   amdgpu_virt_init_vf2pf_work_item(adev);
+
/* we need recover gart prior to run SMC/CP/SDMA resume */
amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
 
@@ -4495,7 +4499,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
 
if (amdgpu_sriov_vf(adev)) {
/* stop the data exchange thread */
-   amdgpu_virt_fini_data_exchange(adev);
+   amdgpu_virt_fini_vf2pf_work_item(adev);
}
 
/* block all schedulers and reset given job's ring */
@@ -4898,7 +4902,7 @@ static void amdgpu_device_recheck_guilty_jobs(
 retry:
/* do hw reset */
if (amdgpu_sriov_vf(adev)) {
-   amdgpu_virt_fini_data_exchange(adev);
+   amdgpu_virt_fini_vf2pf_work_item(adev);
r = amdgpu_device_reset_sriov(adev, false);
if (r)
adev->asic_reset_res = r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3fc49823f527..b6e3d379a86a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -611,16 +611,7 @@ static void amdgpu_virt_update_vf2pf_work_item(struct 
work_struct *work)
schedule_delayed_work(&(adev->virt.vf2pf_work), 
adev->virt.vf2pf_update_interval_ms);
 }
 
-void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
-{
-   if (adev->virt.vf2pf_update_interval_ms != 0) {
-   DRM_INFO("clean up the vf2pf work item\n");
-   cancel_delayed_work_sync(&adev->virt.vf2pf_work);
-   adev->virt.vf2pf_update_interval_ms = 0;
-   }
-}
-
-void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
 {
uint64_t bp_block_offset = 0;
uint32_t bp_block_size = 0;
@@ -628,11 +619,8 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device 
*adev)
 
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
-   adev->virt.vf2pf_update_interval_ms = 0;
 
if (adev->mman.fw_vram_usage_va != N

[PATCH 2/2] drm/amdgpu: Reserve Bad pages early for SRIOV VF

2021-12-09 Thread Victor Skvortsov
Add a pf-vf exchange right after GMC sw init in
order to reserve bad pages as early as possible

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3992c4086d26..a146a55c9864 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2315,6 +2315,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
 
/* need to do gmc hw init early so we can allocate gpu mem */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_virt_exchange_data(adev);
+
r = amdgpu_device_vram_scratch_init(adev);
if (r) {
DRM_ERROR("amdgpu_vram_scratch_init failed 
%d\n", r);
-- 
2.25.1