Under bare metal, there is no more else to take
care of the GPU register access through MMIO.
Under Virtualization, to access GPU register is
implemented through KIQ during run-time due to
world-switch.

Therefore, under SR-IOV user can only access
debugfs to r/w GPU registers when meets all
three conditions below.
- amdgpu_gpu_recovery=0
- TDR happened
- in_gpu_reset=0

Signed-off-by: Yintian Tao <yt...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 83 ++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 23 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  7 ++
 4 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c0f9a651dc06..4f9780aabf5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -152,11 +152,17 @@ static int  amdgpu_debugfs_process_reg_op(bool read, 
struct file *f,
        if (r < 0)
                return r;
 
+       if (!amdgpu_virt_can_access_debugfs(adev))
+               return -EINVAL;
+       else
+               amdgpu_virt_enable_access_debugfs(adev);
+
        if (use_bank) {
                if ((sh_bank != 0xFFFFFFFF && sh_bank >= 
adev->gfx.config.max_sh_per_se) ||
                    (se_bank != 0xFFFFFFFF && se_bank >= 
adev->gfx.config.max_shader_engines)) {
                        pm_runtime_mark_last_busy(adev->ddev->dev);
                        pm_runtime_put_autosuspend(adev->ddev->dev);
+                       amdgpu_virt_disable_access_debugfs(adev);
                        return -EINVAL;
                }
                mutex_lock(&adev->grbm_idx_mutex);
@@ -207,6 +213,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct 
file *f,
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
+       amdgpu_virt_disable_access_debugfs(adev);
        return result;
 }
 
@@ -255,6 +262,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file 
*f, char __user *buf,
        if (r < 0)
                return r;
 
+       if (!amdgpu_virt_can_access_debugfs(adev))
+               return -EINVAL;
+       else
+               amdgpu_virt_enable_access_debugfs(adev);
+
        while (size) {
                uint32_t value;
 
@@ -263,6 +275,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file 
*f, char __user *buf,
                if (r) {
                        pm_runtime_mark_last_busy(adev->ddev->dev);
                        pm_runtime_put_autosuspend(adev->ddev->dev);
+                       amdgpu_virt_disable_access_debugfs(adev);
                        return r;
                }
 
@@ -275,6 +288,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file 
*f, char __user *buf,
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
+       amdgpu_virt_disable_access_debugfs(adev);
        return result;
 }
 
@@ -304,6 +318,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file 
*f, const char __user
        if (r < 0)
                return r;
 
+       if (!amdgpu_virt_can_access_debugfs(adev))
+               return -EINVAL;
+       else
+               amdgpu_virt_enable_access_debugfs(adev);
+
        while (size) {
                uint32_t value;
 
@@ -311,6 +330,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file 
*f, const char __user
                if (r) {
                        pm_runtime_mark_last_busy(adev->ddev->dev);
                        pm_runtime_put_autosuspend(adev->ddev->dev);
+                       amdgpu_virt_disable_access_debugfs(adev);
                        return r;
                }
 
@@ -325,6 +345,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file 
*f, const char __user
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
+       amdgpu_virt_disable_access_debugfs(adev);
        return result;
 }
 
@@ -354,6 +375,11 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file 
*f, char __user *buf,
        if (r < 0)
                return r;
 
+       if (!amdgpu_virt_can_access_debugfs(adev))
+               return -EINVAL;
+       else
+               amdgpu_virt_enable_access_debugfs(adev);
+
        while (size) {
                uint32_t value;
 
@@ -362,6 +388,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file 
*f, char __user *buf,
                if (r) {
                        pm_runtime_mark_last_busy(adev->ddev->dev);
                        pm_runtime_put_autosuspend(adev->ddev->dev);
+                       amdgpu_virt_disable_access_debugfs(adev);
                        return r;
                }
 
@@ -374,6 +401,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file 
*f, char __user *buf,
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
+       amdgpu_virt_disable_access_debugfs(adev);
        return result;
 }
 
@@ -403,6 +431,11 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file 
*f, const char __user
        if (r < 0)
                return r;
 
+       if (!amdgpu_virt_can_access_debugfs(adev))
+               return -EINVAL;
+       else
+               amdgpu_virt_enable_access_debugfs(adev);
+
        while (size) {
                uint32_t value;
 
@@ -410,6 +443,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file 
*f, const char __user
                if (r) {
                        pm_runtime_mark_last_busy(adev->ddev->dev);
                        pm_runtime_put_autosuspend(adev->ddev->dev);
+                       amdgpu_virt_disable_access_debugfs(adev);
                        return r;
                }
 
@@ -424,6 +458,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file 
*f, const char __user
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
+       amdgpu_virt_disable_access_debugfs(adev);
        return result;
 }
 
@@ -453,6 +488,11 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file 
*f, char __user *buf,
        if (r < 0)
                return r;
 
+       if (!amdgpu_virt_can_access_debugfs(adev))
+               return -EINVAL;
+       else
+               amdgpu_virt_enable_access_debugfs(adev);
+
        while (size) {
                uint32_t value;
 
@@ -461,6 +501,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, 
char __user *buf,
                if (r) {
                        pm_runtime_mark_last_busy(adev->ddev->dev);
                        pm_runtime_put_autosuspend(adev->ddev->dev);
+                       amdgpu_virt_disable_access_debugfs(adev);
                        return r;
                }
 
@@ -473,6 +514,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, 
char __user *buf,
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
+       amdgpu_virt_disable_access_debugfs(adev);
        return result;
 }
 
@@ -502,6 +544,11 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file 
*f, const char __user *
        if (r < 0)
                return r;
 
+       if (!amdgpu_virt_can_access_debugfs(adev))
+               return -EINVAL;
+       else
+               amdgpu_virt_enable_access_debugfs(adev);
+
        while (size) {
                uint32_t value;
 
@@ -509,6 +556,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file 
*f, const char __user *
                if (r) {
                        pm_runtime_mark_last_busy(adev->ddev->dev);
                        pm_runtime_put_autosuspend(adev->ddev->dev);
+                       amdgpu_virt_disable_access_debugfs(adev);
                        return r;
                }
 
@@ -523,6 +571,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file 
*f, const char __user *
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
+       amdgpu_virt_disable_access_debugfs(adev);
        return result;
 }
 
@@ -651,16 +700,25 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, 
char __user *buf,
        if (r < 0)
                return r;
 
+       if (!amdgpu_virt_can_access_debugfs(adev))
+               return -EINVAL;
+       else
+               amdgpu_virt_enable_access_debugfs(adev);
+
        r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
 
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
-       if (r)
+       if (r) {
+               amdgpu_virt_disable_access_debugfs(adev);
                return r;
+       }
 
-       if (size > valuesize)
+       if (size > valuesize) {
+               amdgpu_virt_disable_access_debugfs(adev);
                return -EINVAL;
+       }
 
        outsize = 0;
        x = 0;
@@ -673,6 +731,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, 
char __user *buf,
                }
        }
 
+       amdgpu_virt_disable_access_debugfs(adev);
        return !r ? outsize : r;
 }
 
@@ -720,6 +779,11 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, 
char __user *buf,
        if (r < 0)
                return r;
 
+       if (!amdgpu_virt_can_access_debugfs(adev))
+               return -EINVAL;
+       else
+               amdgpu_virt_enable_access_debugfs(adev);
+
        /* switch to the specific se/sh/cu */
        mutex_lock(&adev->grbm_idx_mutex);
        amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -734,16 +798,20 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, 
char __user *buf,
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
-       if (!x)
+       if (!x) {
+               amdgpu_virt_disable_access_debugfs(adev);
                return -EINVAL;
+       }
 
        while (size && (offset < x * 4)) {
                uint32_t value;
 
                value = data[offset >> 2];
                r = put_user(value, (uint32_t *)buf);
-               if (r)
+               if (r) {
+                       amdgpu_virt_disable_access_debugfs(adev);
                        return r;
+               }
 
                result += 4;
                buf += 4;
@@ -751,6 +819,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, 
char __user *buf,
                size -= 4;
        }
 
+       amdgpu_virt_disable_access_debugfs(adev);
        return result;
 }
 
@@ -805,6 +874,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, 
char __user *buf,
        if (r < 0)
                return r;
 
+       if (!amdgpu_virt_can_access_debugfs(adev))
+               return -EINVAL;
+       else
+               amdgpu_virt_enable_access_debugfs(adev);
+
        /* switch to the specific se/sh/cu */
        mutex_lock(&adev->grbm_idx_mutex);
        amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -840,6 +914,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char 
__user *buf,
 
 err:
        kfree(data);
+       amdgpu_virt_disable_access_debugfs(adev);
        return result;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 2b99f5952375..993b75dde5d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
        struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
        struct amdgpu_job *job = to_amdgpu_job(s_job);
        struct amdgpu_task_info ti;
+       struct amdgpu_device *adev = ring->adev;
 
        memset(&ti, 0, sizeof(struct amdgpu_task_info));
 
@@ -49,10 +50,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
        DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
                  ti.process_name, ti.tgid, ti.task_name, ti.pid);
 
-       if (amdgpu_device_should_recover_gpu(ring->adev))
+       if (amdgpu_device_should_recover_gpu(ring->adev)) {
                amdgpu_device_gpu_recover(ring->adev, job);
-       else
+       } else {
                drm_sched_suspend_timeout(&ring->sched);
+               adev->virt.tdr_debug = true;
+       }
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 4d06c79065bf..d0dfe99ebc75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -334,3 +334,26 @@ void amdgpu_detect_virtualization(struct amdgpu_device 
*adev)
                        adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
        }
 }
+
+bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev)
+{
+       if (!amdgpu_sriov_vf(adev))
+               return true;
+
+       if (amdgpu_sriov_is_debug(adev))
+               return true;
+
+       return false;
+}
+
+void amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
+{
+       if (amdgpu_sriov_vf(adev))
+               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+}
+
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
+{
+       if (amdgpu_sriov_vf(adev))
+               adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index f6ae3c656304..a01742b7bf12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -265,6 +265,7 @@ struct amdgpu_virt {
        uint32_t gim_feature;
        uint32_t reg_access_mode;
        int req_init_data_ver;
+       bool tdr_debug;
 };
 
 #define amdgpu_sriov_enabled(adev) \
@@ -296,6 +297,8 @@ static inline bool is_virtual_machine(void)
 
 #define amdgpu_sriov_is_pp_one_vf(adev) \
        ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
+#define amdgpu_sriov_is_debug(adev) \
+       ((!adev->in_gpu_reset) && adev->virt.tdr_debug)
 
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
@@ -314,4 +317,8 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned 
long obj_size,
                                        unsigned int chksum);
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
 void amdgpu_detect_virtualization(struct amdgpu_device *adev);
+
+bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
+void amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
 #endif
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to