RE: [PATCH] drm/amd/pm: bug fix for baco reset

2021-03-17 Thread Lazar, Lijo
[AMD Public Use]


-Original Message-
From: amd-gfx  On Behalf Of Kenneth Feng
Sent: Thursday, March 18, 2021 9:37 AM
To: amd-gfx@lists.freedesktop.org
Cc: Feng, Kenneth 
Subject: [PATCH] drm/amd/pm: bug fix for baco reset

On vega20, rocm-smi gets the wrong gfx voltage after baco reset.
This can be reproduced as below.
:~$ rocm-smi --showvoltage
GPU[0] : Voltage (mV): 737
:~$ rocm-smi -d0 --gpureset
GPU[0] : GPU reset was successful
:~$ rocm-smi --showvoltage
GPU[0] : Voltage (mV): 1550

Root cause: telemetry is disabled in the asic_init after baco exit.
This fix targets to re-enable telemetry then all the power and voltage info can 
be fetched correctly, mp1 firmware also depends on this setting for dpm 
arbitration.

Signed-off-by: Kenneth Feng 
---
 .../drm/amd/pm/powerplay/hwmgr/vega20_baco.c| 17 +
 .../drm/amd/pm/powerplay/hwmgr/vega20_baco.h|  2 +-
 .../drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c   |  1 +
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
index 2a28c9df15a0..bb58097a925c 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
@@ -28,9 +28,16 @@
 #include "vega20_ppsmc.h"
 #include "vega20_baco.h"
 #include "vega20_smumgr.h"
+#include "smuio/smuio_9_0_offset.h"
+#include "smuio/smuio_9_0_sh_mask.h"
 
 #include "amdgpu_ras.h"
 
+#define mmSMUSVI0_TFN 0x2
+#define SMUSVI0_TFN___PLANE0_MASK 0x1
+#define SMUSVI0_TFN___PLANE1_MASK 0x2
+#define mmSMUSVI0_TFN_BASE_IDX 0
+
 static const struct soc15_baco_cmd_entry clean_baco_tbl[] =  {
{CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_6), 0, 0, 0, 0}, @@ 
-120,3 +127,13 @@ int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr 
*hwmgr)
 
return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_BacoWorkAroundFlushVDCI, 
NULL);  }
+
+void vega20_baco_override_telemetry_parameters(struct pp_hwmgr *hwmgr) 
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+   uint32_t data = RREG32_SOC15(SMUIO, 0, mmSMUSVI0_TFN);
+

< > This most likely needs a !VF check. Register may not be accessible for VF 
cases.

Thanks,
Lijo

+   data &= (~SMUSVI0_TFN___PLANE0_MASK);
+   data |= SMUSVI0_TFN___PLANE1_MASK;
+   WREG32_SOC15(SMUIO, 0, mmSMUSVI0_TFN, data); }
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
index f06471e712dc..9ca39569ba0e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
@@ -29,5 +29,5 @@ extern int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, 
bool *cap);  extern int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum 
BACO_STATE *state);  extern int vega20_baco_set_state(struct pp_hwmgr *hwmgr, 
enum BACO_STATE state);  extern int 
vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr);
-
+extern void vega20_baco_override_telemetry_parameters(struct pp_hwmgr 
+*hwmgr);
 #endif
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
index 213c9c6b4462..12830a8dd923 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
@@ -503,6 +503,7 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr)
ret = vega20_baco_apply_vdci_flush_workaround(hwmgr);
if (ret)
pr_err("Failed to apply vega20 baco workaround!\n");
+   vega20_baco_override_telemetry_parameters(hwmgr);
}
 
return ret;
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Clijo.lazar%40amd.com%7C7fee5a2166be4f31286408d8e9c33f55%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637516372082911361%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=yzMgTiFkTVDmMQ%2FxNH4Zztx3NQRXPSCKw7pflm3VUZY%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 09/13] drm/amdgpu/swsmu: skip gfx cgpg on s0ix suspend

2021-03-17 Thread Alex Deucher
The SMU expects CGPG to be enabled when entering S0ix.
with this we can re-enable SMU suspend.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +--
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c  | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 99444bd1f2e9..a84d93ced407 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2727,8 +2727,7 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
 
/* XXX fix these remaining cases */
if (adev->in_s0ix &&
-   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC 
|| /* breaks suspend */
-adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP 
|| /* breaks resume */
+   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP 
|| /* breaks resume */
 adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_GFX))  /* breaks suspend */
continue;
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index cfcac110ed84..143e3783251e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1474,7 +1474,8 @@ static int smu_suspend(void *handle)
 
smu->watermarks_bitmap &= ~(WATERMARKS_LOADED);
 
-   if (smu->is_apu)
+   /* skip CGPG when in S0ix */
+   if (smu->is_apu && !adev->in_s0ix)
smu_set_gfx_cgpg(>smu, false);
 
return 0;
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 11/13] drm/amdgpu: skip CG/PG for gfx during S0ix

2021-03-17 Thread Alex Deucher
Not needed as the device is in gfxoff state so the CG/PG state
is handled just like it would be for gfxoff during runtime gfxoff.

This should also prevent delays on resume.

Signed-off-by: Alex Deucher 
Cc: Pratik Vishwakarma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a6f4b52ec796..de70a9917db5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2344,6 +2344,10 @@ static int amdgpu_device_set_cg_state(struct 
amdgpu_device *adev,
i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 
1;
if (!adev->ip_blocks[i].status.late_initialized)
continue;
+   /* skip CG for GFX on S0ix */
+   if (adev->in_s0ix &&
+   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
+   continue;
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
@@ -2375,6 +2379,10 @@ static int amdgpu_device_set_pg_state(struct 
amdgpu_device *adev, enum amd_power
i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 
1;
if (!adev->ip_blocks[i].status.late_initialized)
continue;
+   /* skip PG for GFX on S0ix */
+   if (adev->in_s0ix &&
+   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
+   continue;
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 12/13] drm/amdgpu: drop S0ix checks around CG/PG in suspend

2021-03-17 Thread Alex Deucher
We handle it properly within the CG/PG functions directly
now.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index de70a9917db5..965b4f18ddc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2664,10 +2664,8 @@ static int amdgpu_device_ip_suspend_phase1(struct 
amdgpu_device *adev)
 {
int i, r;
 
-   if (!adev->in_s0ix || amdgpu_in_reset(adev)) {
-   amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
-   amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
-   }
+   amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+   amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
 
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 04/13] drm/amdgpu: rework S3/S4/S0ix state handling

2021-03-17 Thread Alex Deucher
Set flags at the top level pmops callbacks to track
state.  This cleans up the current set of flags and
properly handles S4 on S0ix capable systems.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 10 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  8 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 30 +-
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c  |  2 +-
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 569c85419f76..c03463d614e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1030,13 +1030,9 @@ struct amdgpu_device {
 
/* s3/s4 mask */
boolin_suspend;
-   boolin_hibernate;
-
-   /*
-* The combination flag in_poweroff_reboot_com used to identify the 
poweroff
-* and reboot opt in the s0i3 system-wide suspend.
-*/
-   boolin_poweroff_reboot_com;
+   boolin_s3;
+   boolin_s4;
+   boolin_s0ix;
 
atomic_tin_gpu_reset;
enum pp_mp1_state   mp1_state;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a984ba6634fd..1859bc06ed70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2656,8 +2656,7 @@ static int amdgpu_device_ip_suspend_phase1(struct 
amdgpu_device *adev)
 {
int i, r;
 
-   if (adev->in_poweroff_reboot_com ||
-   !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) {
+   if (!adev->in_s0ix || amdgpu_in_reset(adev)) {
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
}
@@ -3737,8 +3736,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
 
amdgpu_fence_driver_suspend(adev);
 
-   if (adev->in_poweroff_reboot_com ||
-   !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev))
+   if (!adev->in_s0ix || amdgpu_in_reset(adev))
r = amdgpu_device_ip_suspend_phase2(adev);
else
amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
@@ -3772,7 +3770,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
 
-   if (amdgpu_acpi_is_s0ix_supported(adev))
+   if (adev->in_s0ix)
amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
 
/* post card */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index cfa73ff292d5..5438a4d3d517 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1334,9 +1334,7 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
 */
if (!amdgpu_passthrough(adev))
adev->mp1_state = PP_MP1_STATE_UNLOAD;
-   adev->in_poweroff_reboot_com = true;
amdgpu_device_ip_suspend(adev);
-   adev->in_poweroff_reboot_com = false;
adev->mp1_state = PP_MP1_STATE_NONE;
 }
 
@@ -1425,15 +1423,28 @@ static void amdgpu_pmops_complete(struct device *dev)
 static int amdgpu_pmops_suspend(struct device *dev)
 {
struct drm_device *drm_dev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(drm_dev);
+   int r;
 
-   return amdgpu_device_suspend(drm_dev, true);
+   if (amdgpu_acpi_is_s0ix_supported(adev))
+   adev->in_s0ix = true;
+   adev->in_s3 = true;
+   r = amdgpu_device_suspend(drm_dev, true);
+   adev->in_s3 = false;
+
+   return r;
 }
 
 static int amdgpu_pmops_resume(struct device *dev)
 {
struct drm_device *drm_dev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(drm_dev);
+   int r;
 
-   return amdgpu_device_resume(drm_dev, true);
+   r = amdgpu_device_resume(drm_dev, true);
+   if (amdgpu_acpi_is_s0ix_supported(adev))
+   adev->in_s0ix = false;
+   return r;
 }
 
 static int amdgpu_pmops_freeze(struct device *dev)
@@ -1442,9 +1453,9 @@ static int amdgpu_pmops_freeze(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int r;
 
-   adev->in_hibernate = true;
+   adev->in_s4 = true;
r = amdgpu_device_suspend(drm_dev, true);
-   adev->in_hibernate = false;
+   adev->in_s4 = false;
if (r)
return r;
return amdgpu_asic_reset(adev);
@@ -1460,13 +1471,8 @@ static int amdgpu_pmops_thaw(struct device *dev)
 static int amdgpu_pmops_poweroff(struct device *dev)
 {
struct drm_device *drm_dev = 

[PATCH 10/13] drm/amdgpu: update comments about s0ix suspend/resume

2021-03-17 Thread Alex Deucher
Provide and explanation as to why we skip GFX and PSP for
S0ix.  GFX goes into gfxoff, same as runtime, so no need
to tear down and re-init.  PSP is part of the always on
state, so no need to touch it.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a84d93ced407..a6f4b52ec796 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2725,10 +2725,14 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
continue;
}
 
-   /* XXX fix these remaining cases */
+   /* skip suspend of gfx and psp for S0ix
+* gfx is in gfxoff state, so on resume it will exit gfxoff just
+* like at runtime. PSP is also part of the always on hardware
+* so no need to suspend it.
+*/
if (adev->in_s0ix &&
-   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP 
|| /* breaks resume */
-adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_GFX))  /* breaks suspend */
+   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP 
||
+adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
continue;
 
/* XXX handle errors */
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 13/13] drm/amdgpu: skip kfd suspend/resume for S0ix

2021-03-17 Thread Alex Deucher
GFX is in gfxoff mode during s0ix so we shouldn't need to
actually tear anything down and restore it.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 965b4f18ddc7..319ad9326a71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3705,7 +3705,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
 
r = amdgpu_device_ip_suspend_phase1(adev);
 
-   amdgpu_amdkfd_suspend(adev, adev->in_runpm);
+   if (!adev->in_s0ix)
+   amdgpu_amdkfd_suspend(adev, adev->in_runpm);
 
/* evict vram memory */
amdgpu_bo_evict_vram(adev);
@@ -3765,9 +3766,11 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
queue_delayed_work(system_wq, >delayed_init_work,
   msecs_to_jiffies(AMDGPU_RESUME_MS));
 
-   r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
-   if (r)
-   return r;
+   if (!adev->in_s0ix) {
+   r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
+   if (r)
+   return r;
+   }
 
/* Make sure IB tests flushed */
flush_delayed_work(>delayed_init_work);
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 02/13] drm/amdgpu: enable DPM_FLAG_MAY_SKIP_RESUME and DPM_FLAG_SMART_SUSPEND flags (v2)

2021-03-17 Thread Alex Deucher
Once the device has runtime suspended, we don't need to power it
back up again for system suspend.  Likewise for resume, we don't
to power up the device again on resume only to power it back off
again via runtime pm because it's still idle.

v2: add DPM_FLAG_SMART_PREPARE as well

Acked-by: Rajneesh Bhardwaj  (v1)
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index f269267a96d3..8e6ef4d8b7ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -205,6 +205,13 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, 
unsigned long flags)
if (amdgpu_device_supports_atpx(dev) &&
!amdgpu_is_atpx_hybrid())
dev_pm_set_driver_flags(dev->dev, 
DPM_FLAG_NO_DIRECT_COMPLETE);
+   /* we want direct complete for BOCO */
+   if ((amdgpu_device_supports_atpx(dev) &&
+   amdgpu_is_atpx_hybrid()) ||
+   amdgpu_device_supports_boco(dev))
+   dev_pm_set_driver_flags(dev->dev, 
DPM_FLAG_SMART_PREPARE |
+   DPM_FLAG_SMART_SUSPEND |
+   DPM_FLAG_MAY_SKIP_RESUME);
pm_runtime_use_autosuspend(dev->dev);
pm_runtime_set_autosuspend_delay(dev->dev, 5000);
pm_runtime_allow(dev->dev);
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 06/13] drm/amdgpu: clean up non-DC suspend/resume handling

2021-03-17 Thread Alex Deucher
Move the non-DC specific code into the DCE IP blocks similar
to how we handle DC.  This cleans up the common suspend
and resume pathes.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 86 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 88 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.h |  3 +
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c  |  9 ++-
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c  |  9 ++-
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c   |  8 +-
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c   |  9 ++-
 drivers/gpu/drm/amd/amdgpu/dce_virtual.c| 15 +++-
 8 files changed, 138 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1859bc06ed70..80263bb8a631 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3667,14 +3667,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
  */
 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 {
-   struct amdgpu_device *adev;
-   struct drm_crtc *crtc;
-   struct drm_connector *connector;
-   struct drm_connector_list_iter iter;
+   struct amdgpu_device *adev = drm_to_adev(dev);
int r;
 
-   adev = drm_to_adev(dev);
-
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
 
@@ -3686,45 +3681,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
 
cancel_delayed_work_sync(>delayed_init_work);
 
-   if (!amdgpu_device_has_dc_support(adev)) {
-   /* turn off display hw */
-   drm_modeset_lock_all(dev);
-   drm_connector_list_iter_begin(dev, );
-   drm_for_each_connector_iter(connector, )
-   drm_helper_connector_dpms(connector,
- DRM_MODE_DPMS_OFF);
-   drm_connector_list_iter_end();
-   drm_modeset_unlock_all(dev);
-   /* unpin the front buffers and cursors */
-   list_for_each_entry(crtc, >mode_config.crtc_list, head) {
-   struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-   struct drm_framebuffer *fb = crtc->primary->fb;
-   struct amdgpu_bo *robj;
-
-   if (amdgpu_crtc->cursor_bo && 
!adev->enable_virtual_display) {
-   struct amdgpu_bo *aobj = 
gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-   r = amdgpu_bo_reserve(aobj, true);
-   if (r == 0) {
-   amdgpu_bo_unpin(aobj);
-   amdgpu_bo_unreserve(aobj);
-   }
-   }
-
-   if (fb == NULL || fb->obj[0] == NULL) {
-   continue;
-   }
-   robj = gem_to_amdgpu_bo(fb->obj[0]);
-   /* don't unpin kernel fb objects */
-   if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
-   r = amdgpu_bo_reserve(robj, true);
-   if (r == 0) {
-   amdgpu_bo_unpin(robj);
-   amdgpu_bo_unreserve(robj);
-   }
-   }
-   }
-   }
-
amdgpu_ras_suspend(adev);
 
r = amdgpu_device_ip_suspend_phase1(adev);
@@ -3761,10 +3717,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
  */
 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
 {
-   struct drm_connector *connector;
-   struct drm_connector_list_iter iter;
struct amdgpu_device *adev = drm_to_adev(dev);
-   struct drm_crtc *crtc;
int r = 0;
 
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
@@ -3795,24 +3748,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
queue_delayed_work(system_wq, >delayed_init_work,
   msecs_to_jiffies(AMDGPU_RESUME_MS));
 
-   if (!amdgpu_device_has_dc_support(adev)) {
-   /* pin cursors */
-   list_for_each_entry(crtc, >mode_config.crtc_list, head) {
-   struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
-   if (amdgpu_crtc->cursor_bo && 
!adev->enable_virtual_display) {
-   struct amdgpu_bo *aobj = 
gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-   r = amdgpu_bo_reserve(aobj, true);
-   if (r == 0) {
-   r = amdgpu_bo_pin(aobj, 
AMDGPU_GEM_DOMAIN_VRAM);
-   if (r != 0)
-   dev_err(adev->dev, "Failed to 

[PATCH 07/13] drm/amdgpu: move s0ix check into amdgpu_device_ip_suspend_phase2 (v3)

2021-03-17 Thread Alex Deucher
No functional change.

v2: use correct dev
v3: rework

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 80263bb8a631..2f34628e15bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2699,6 +2699,11 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
 {
int i, r;
 
+   if (adev->in_s0ix) {
+   amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
+   return 0;
+   }
+
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
@@ -3692,10 +3697,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
 
amdgpu_fence_driver_suspend(adev);
 
-   if (!adev->in_s0ix || amdgpu_in_reset(adev))
-   r = amdgpu_device_ip_suspend_phase2(adev);
-   else
-   amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
+   r = amdgpu_device_ip_suspend_phase2(adev);
/* evict remaining vram memory
 * This second call to evict vram is to evict the gart page table
 * using the CPU.
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 01/13] drm/amdgpu: add a dev_pm_ops prepare callback (v2)

2021-03-17 Thread Alex Deucher
as per:
https://www.kernel.org/doc/html/latest/driver-api/pm/devices.html

The prepare callback is required to support the DPM_FLAG_SMART_SUSPEND
driver flag.  This allows runtime pm to auto complete when the
system goes into suspend avoiding a wake up on suspend and on resume.
Apply this for hybrid gfx and BOCO systems where d3cold is
provided by the ACPI platform.

v2: check if device is runtime suspended in prepare.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 49484ea5366d..60fba0454368 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "amdgpu.h"
 #include "amdgpu_irq.h"
@@ -1402,6 +1403,27 @@ static void amdgpu_drv_delayed_reset_work_handler(struct 
work_struct *work)
return;
 }
 
+static int amdgpu_pmops_prepare(struct device *dev)
+{
+   struct drm_device *drm_dev = dev_get_drvdata(dev);
+
+   /* Return a positive number here so
+* DPM_FLAG_SMART_SUSPEND works properly
+*/
+   if ((amdgpu_device_supports_atpx(drm_dev) &&
+   amdgpu_is_atpx_hybrid()) ||
+   amdgpu_device_supports_boco(drm_dev))
+   return pm_runtime_suspended(dev) &&
+   pm_suspend_via_firmware();
+
+   return 0;
+}
+
+static void amdgpu_pmops_complete(struct device *dev)
+{
+   /* nothing to do */
+}
+
 static int amdgpu_pmops_suspend(struct device *dev)
 {
struct drm_device *drm_dev = dev_get_drvdata(dev);
@@ -1620,6 +1642,8 @@ long amdgpu_drm_ioctl(struct file *filp,
 }
 
 static const struct dev_pm_ops amdgpu_pm_ops = {
+   .prepare = amdgpu_pmops_prepare,
+   .complete = amdgpu_pmops_complete,
.suspend = amdgpu_pmops_suspend,
.resume = amdgpu_pmops_resume,
.freeze = amdgpu_pmops_freeze,
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 03/13] drm/amdgpu: disentangle HG systems from vgaswitcheroo

2021-03-17 Thread Alex Deucher
There's no need to keep vgaswitcheroo around for HG
systems.  They don't use muxes and their power control
is handled via ACPI.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 38 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 34 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  9 ++---
 4 files changed, 35 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index efe6b5ca5185..569c85419f76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1275,8 +1275,9 @@ void amdgpu_device_program_register_sequence(struct 
amdgpu_device *adev,
 const u32 *registers,
 const u32 array_size);
 
-bool amdgpu_device_supports_atpx(struct drm_device *dev);
 int amdgpu_device_mode1_reset(struct amdgpu_device *adev);
+bool amdgpu_device_supports_atpx(struct drm_device *dev);
+bool amdgpu_device_supports_px(struct drm_device *dev);
 bool amdgpu_device_supports_boco(struct drm_device *dev);
 bool amdgpu_device_supports_baco(struct drm_device *dev);
 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0f82c5d21237..a984ba6634fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -212,18 +212,18 @@ static DEVICE_ATTR(serial_number, S_IRUGO,
amdgpu_device_get_serial_number, NULL);
 
 /**
- * amdgpu_device_supports_atpx - Is the device a dGPU with HG/PX power control
+ * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
  *
  * @dev: drm_device pointer
  *
- * Returns true if the device is a dGPU with HG/PX power control,
+ * Returns true if the device is a dGPU with ATPX power control,
  * otherwise return false.
  */
-bool amdgpu_device_supports_atpx(struct drm_device *dev)
+bool amdgpu_device_supports_px(struct drm_device *dev)
 {
struct amdgpu_device *adev = drm_to_adev(dev);
 
-   if (adev->flags & AMD_IS_PX)
+   if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
return true;
return false;
 }
@@ -233,14 +233,15 @@ bool amdgpu_device_supports_atpx(struct drm_device *dev)
  *
  * @dev: drm_device pointer
  *
- * Returns true if the device is a dGPU with HG/PX power control,
+ * Returns true if the device is a dGPU with ACPI power control,
  * otherwise return false.
  */
 bool amdgpu_device_supports_boco(struct drm_device *dev)
 {
struct amdgpu_device *adev = drm_to_adev(dev);
 
-   if (adev->has_pr3)
+   if (adev->has_pr3 ||
+   ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
return true;
return false;
 }
@@ -1391,7 +1392,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev 
*pdev,
struct drm_device *dev = pci_get_drvdata(pdev);
int r;
 
-   if (amdgpu_device_supports_atpx(dev) && state == VGA_SWITCHEROO_OFF)
+   if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
return;
 
if (state == VGA_SWITCHEROO_ON) {
@@ -3197,7 +3198,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
struct drm_device *ddev = adev_to_drm(adev);
struct pci_dev *pdev = adev->pdev;
int r, i;
-   bool atpx = false;
+   bool px = false;
u32 max_MBps;
 
adev->shutdown = false;
@@ -3359,16 +3360,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
vga_client_register(adev->pdev, adev, NULL, 
amdgpu_device_vga_set_decode);
 
-   if (amdgpu_device_supports_atpx(ddev))
-   atpx = true;
-   if (amdgpu_has_atpx() &&
-   (amdgpu_is_atpx_hybrid() ||
-amdgpu_has_atpx_dgpu_power_cntl()) &&
-   !pci_is_thunderbolt_attached(adev->pdev))
+   if (amdgpu_device_supports_px(ddev)) {
+   px = true;
vga_switcheroo_register_client(adev->pdev,
-  _switcheroo_ops, atpx);
-   if (atpx)
+  _switcheroo_ops, px);
vga_switcheroo_init_domain_pm_ops(adev->dev, 
>vga_pm_domain);
+   }
 
if (amdgpu_emu_mode == 1) {
/* post the asic on emulation mode */
@@ -3575,7 +3572,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 failed:
amdgpu_vf_error_trans_all(adev);
-   if (atpx)
+   if (px)
vga_switcheroo_fini_domain_pm_ops(adev->dev);
 
 failed_unmap:
@@ -3635,13 +3632,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 
kfree(adev->bios);
adev->bios = NULL;
-   if (amdgpu_has_atpx() 

[PATCH 08/13] drm/amdgpu: re-enable suspend phase 2 for S0ix

2021-03-17 Thread Alex Deucher
This really needs to be done to properly tear down
the device.  SMC, PSP, and GFX are still problematic,
need to dig deeper into what aspect of them that is
problematic.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2f34628e15bd..99444bd1f2e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2699,10 +2699,8 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
 {
int i, r;
 
-   if (adev->in_s0ix) {
+   if (adev->in_s0ix)
amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
-   return 0;
-   }
 
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
@@ -2726,6 +2724,14 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
adev->ip_blocks[i].status.hw = false;
continue;
}
+
+   /* XXX fix these remaining cases */
+   if (adev->in_s0ix &&
+   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC 
|| /* breaks suspend */
+adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP 
|| /* breaks resume */
+adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_GFX))  /* breaks suspend */
+   continue;
+
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 05/13] drm/amdgpu: don't evict vram on APUs for suspend to ram (v4)

2021-03-17 Thread Alex Deucher
Vram is system memory, so no need to evict.

v2: use PM_EVENT messages
v3: use correct dev
v4: use driver flags

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index de52a99916f8..a26e24d86929 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1028,13 +1028,10 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
 {
struct ttm_resource_manager *man;
 
-   /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
-#ifndef CONFIG_HIBERNATION
-   if (adev->flags & AMD_IS_APU) {
-   /* Useless to evict on IGP chips */
+   if (adev->in_s3 && (adev->flags & AMD_IS_APU)) {
+   /* No need to evict vram on APUs for suspend to ram */
return 0;
}
-#endif
 
man = ttm_manager_type(>mman.bdev, TTM_PL_VRAM);
return ttm_resource_manager_evict_all(>mman.bdev, man);
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[pull] amdgpu drm-fixes-5.12

2021-03-17 Thread Alex Deucher
Hi Dave, Daniel,

Fixes for 5.12.

The following changes since commit 4042160c2e5433e0759782c402292a90b5bf458d:

  drm/nouveau: fix dma syncing for loops (v2) (2021-03-12 11:21:47 +1000)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-5.12-2021-03-18

for you to fetch changes up to beb6b2f97e0a02164c7f0df6e08c49219cfc2b80:

  drm/amd/display: Remove MPC gamut remap logic for DCN30 (2021-03-18 00:05:22 
-0400)


amdgpu:
- DCN 3.0 gamma fixes
- DCN 2.1 corrupt screen fix


Calvin Hou (1):
  drm/amd/display: Correct algorithm for reversed gamma

Dillon Varone (1):
  drm/amd/display: Remove MPC gamut remap logic for DCN30

Sung Lee (1):
  drm/amd/display: Copy over soc values before bounding box creation

 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 34 ++
 .../gpu/drm/amd/display/dc/dcn21/dcn21_resource.c  |  5 
 .../gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c | 26 -
 3 files changed, 25 insertions(+), 40 deletions(-)
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/pm: bug fix for baco reset

2021-03-17 Thread Quan, Evan
[AMD Public Use]

Reviewed-by: Evan Quan 

-Original Message-
From: amd-gfx  On Behalf Of Kenneth Feng
Sent: Thursday, March 18, 2021 12:07 PM
To: amd-gfx@lists.freedesktop.org
Cc: Feng, Kenneth 
Subject: [PATCH] drm/amd/pm: bug fix for baco reset

On vega20, rocm-smi gets the wrong gfx voltage after baco reset.
This can be reproduced as below.
:~$ rocm-smi --showvoltage
GPU[0] : Voltage (mV): 737
:~$ rocm-smi -d0 --gpureset
GPU[0] : GPU reset was successful
:~$ rocm-smi --showvoltage
GPU[0] : Voltage (mV): 1550

Root cause: telemetry is disabled in the asic_init after baco exit.
This fix targets to re-enable telemetry then all the power and voltage
info can be fetched correctly, mp1 firmware also depends on this setting
for dpm arbitration.

Signed-off-by: Kenneth Feng 
---
 .../drm/amd/pm/powerplay/hwmgr/vega20_baco.c| 17 +
 .../drm/amd/pm/powerplay/hwmgr/vega20_baco.h|  2 +-
 .../drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c   |  1 +
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
index 2a28c9df15a0..bb58097a925c 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
@@ -28,9 +28,16 @@
 #include "vega20_ppsmc.h"
 #include "vega20_baco.h"
 #include "vega20_smumgr.h"
+#include "smuio/smuio_9_0_offset.h"
+#include "smuio/smuio_9_0_sh_mask.h"
 
 #include "amdgpu_ras.h"
 
+#define mmSMUSVI0_TFN 0x2
+#define SMUSVI0_TFN___PLANE0_MASK 0x1
+#define SMUSVI0_TFN___PLANE1_MASK 0x2
+#define mmSMUSVI0_TFN_BASE_IDX 0
+
 static const struct soc15_baco_cmd_entry clean_baco_tbl[] =
 {
{CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_6), 0, 0, 0, 0},
@@ -120,3 +127,13 @@ int vega20_baco_apply_vdci_flush_workaround(struct 
pp_hwmgr *hwmgr)
 
return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_BacoWorkAroundFlushVDCI, 
NULL);
 }
+
+void vega20_baco_override_telemetry_parameters(struct pp_hwmgr *hwmgr)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+   uint32_t data = RREG32_SOC15(SMUIO, 0, mmSMUSVI0_TFN);
+
+   data &= (~SMUSVI0_TFN___PLANE0_MASK);
+   data |= SMUSVI0_TFN___PLANE1_MASK;
+   WREG32_SOC15(SMUIO, 0, mmSMUSVI0_TFN, data);
+}
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
index f06471e712dc..9ca39569ba0e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
@@ -29,5 +29,5 @@ extern int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, 
bool *cap);
 extern int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE 
*state);
 extern int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE 
state);
 extern int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr);
-
+extern void vega20_baco_override_telemetry_parameters(struct pp_hwmgr *hwmgr);
 #endif
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
index 213c9c6b4462..12830a8dd923 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
@@ -503,6 +503,7 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr)
ret = vega20_baco_apply_vdci_flush_workaround(hwmgr);
if (ret)
pr_err("Failed to apply vega20 baco workaround!\n");
+   vega20_baco_override_telemetry_parameters(hwmgr);
}
 
return ret;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cevan.quan%40amd.com%7C7fee5a2166be4f31286408d8e9c33f55%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637516372084403923%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=KEFE61ILud7i%2F28piougWFNBBSYz%2B5mi3dBfHSghllw%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/pm: bug fix for baco reset

2021-03-17 Thread Kenneth Feng
On vega20, rocm-smi gets the wrong gfx voltage after baco reset.
This can be reproduced as below.
:~$ rocm-smi --showvoltage
GPU[0] : Voltage (mV): 737
:~$ rocm-smi -d0 --gpureset
GPU[0] : GPU reset was successful
:~$ rocm-smi --showvoltage
GPU[0] : Voltage (mV): 1550

Root cause: telemetry is disabled in the asic_init after baco exit.
This fix targets to re-enable telemetry then all the power and voltage
info can be fetched correctly, mp1 firmware also depends on this setting
for dpm arbitration.

Signed-off-by: Kenneth Feng 
---
 .../drm/amd/pm/powerplay/hwmgr/vega20_baco.c| 17 +
 .../drm/amd/pm/powerplay/hwmgr/vega20_baco.h|  2 +-
 .../drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c   |  1 +
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
index 2a28c9df15a0..bb58097a925c 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
@@ -28,9 +28,16 @@
 #include "vega20_ppsmc.h"
 #include "vega20_baco.h"
 #include "vega20_smumgr.h"
+#include "smuio/smuio_9_0_offset.h"
+#include "smuio/smuio_9_0_sh_mask.h"
 
 #include "amdgpu_ras.h"
 
+#define mmSMUSVI0_TFN 0x2
+#define SMUSVI0_TFN___PLANE0_MASK 0x1
+#define SMUSVI0_TFN___PLANE1_MASK 0x2
+#define mmSMUSVI0_TFN_BASE_IDX 0
+
 static const struct soc15_baco_cmd_entry clean_baco_tbl[] =
 {
{CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_6), 0, 0, 0, 0},
@@ -120,3 +127,13 @@ int vega20_baco_apply_vdci_flush_workaround(struct 
pp_hwmgr *hwmgr)
 
return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_BacoWorkAroundFlushVDCI, 
NULL);
 }
+
+void vega20_baco_override_telemetry_parameters(struct pp_hwmgr *hwmgr)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+   uint32_t data = RREG32_SOC15(SMUIO, 0, mmSMUSVI0_TFN);
+
+   data &= (~SMUSVI0_TFN___PLANE0_MASK);
+   data |= SMUSVI0_TFN___PLANE1_MASK;
+   WREG32_SOC15(SMUIO, 0, mmSMUSVI0_TFN, data);
+}
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
index f06471e712dc..9ca39569ba0e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
@@ -29,5 +29,5 @@ extern int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, 
bool *cap);
 extern int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE 
*state);
 extern int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE 
state);
 extern int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr);
-
+extern void vega20_baco_override_telemetry_parameters(struct pp_hwmgr *hwmgr);
 #endif
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
index 213c9c6b4462..12830a8dd923 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
@@ -503,6 +503,7 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr)
ret = vega20_baco_apply_vdci_flush_workaround(hwmgr);
if (ret)
pr_err("Failed to apply vega20 baco workaround!\n");
+   vega20_baco_override_telemetry_parameters(hwmgr);
}
 
return ret;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Remove unnecessary conversion to bool

2021-03-17 Thread Alex Deucher
On Wed, Mar 17, 2021 at 10:37 PM Jiapeng Chong
 wrote:
>
> Fix the following coccicheck warnings:
>
> ./drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c:220:65-70:
> WARNING: conversion to bool not needed here.
>
> Reported-by: Abaci Robot 
> Signed-off-by: Jiapeng Chong 

Applied.  Thanks.  In general, you can just roll up most these bool
conversion patches into larger patches; no need to fix them all one at
a time.

Alex

> ---
>  drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c 
> b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
> index 8593145..3fe9e41 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
> @@ -217,7 +217,7 @@ static bool dwb3_program_ogam_lut(
> else
> next_mode = LUT_RAM_A;
>
> -   dwb3_configure_ogam_lut(dwbc30, next_mode == LUT_RAM_A ? true : 
> false);
> +   dwb3_configure_ogam_lut(dwbc30, next_mode == LUT_RAM_A);
>
> if (next_mode == LUT_RAM_A)
> dwb3_program_ogam_luta_settings(dwbc30, params);
> --
> 1.8.3.1
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Remove unnecessary conversion to bool

2021-03-17 Thread Alex Deucher
On Tue, Mar 16, 2021 at 4:09 AM Jiapeng Chong
 wrote:
>
> Fix the following coccicheck warnings:
>
> ./drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c:721:65-70: WARNING:
> conversion to bool not needed here.
>
> ./drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c:1139:67-72: WARNING:
> conversion to bool not needed here.
>
> Reported-by: Abaci Robot 
> Signed-off-by: Jiapeng Chong 

Applied.  Thanks!

Alex

> ---
>  drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c 
> b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
> index 6e864b1..434d3c4 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
> @@ -718,7 +718,7 @@ bool dpp3_program_blnd_lut(
> next_mode = LUT_RAM_B;
>
> dpp3_power_on_blnd_lut(dpp_base, true);
> -   dpp3_configure_blnd_lut(dpp_base, next_mode == LUT_RAM_A ? 
> true:false);
> +   dpp3_configure_blnd_lut(dpp_base, next_mode == LUT_RAM_A);
>
> if (next_mode == LUT_RAM_A)
> dpp3_program_blnd_luta_settings(dpp_base, params);
> @@ -1136,7 +1136,7 @@ bool dpp3_program_shaper(
> else
> next_mode = LUT_RAM_A;
>
> -   dpp3_configure_shaper_lut(dpp_base, next_mode == LUT_RAM_A ? 
> true:false);
> +   dpp3_configure_shaper_lut(dpp_base, next_mode == LUT_RAM_A);
>
> if (next_mode == LUT_RAM_A)
> dpp3_program_shaper_luta_settings(dpp_base, params);
> --
> 1.8.3.1
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: Remove unnecessary conversion to bool

2021-03-17 Thread Jiapeng Chong
Fix the following coccicheck warnings:

./drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c:220:65-70:
WARNING: conversion to bool not needed here.

Reported-by: Abaci Robot 
Signed-off-by: Jiapeng Chong 
---
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
index 8593145..3fe9e41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
@@ -217,7 +217,7 @@ static bool dwb3_program_ogam_lut(
else
next_mode = LUT_RAM_A;
 
-   dwb3_configure_ogam_lut(dwbc30, next_mode == LUT_RAM_A ? true : false);
+   dwb3_configure_ogam_lut(dwbc30, next_mode == LUT_RAM_A);
 
if (next_mode == LUT_RAM_A)
dwb3_program_ogam_luta_settings(dwbc30, params);
-- 
1.8.3.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [RESEND 00/53] Rid GPU from W=1 warnings

2021-03-17 Thread Daniel Vetter
On Wed, Mar 17, 2021 at 9:17 AM Lee Jones  wrote:
>
> On Thu, 11 Mar 2021, Lee Jones wrote:
>
> > On Thu, 11 Mar 2021, Daniel Vetter wrote:
> >
> > > On Mon, Mar 08, 2021 at 09:19:32AM +, Lee Jones wrote:
> > > > On Fri, 05 Mar 2021, Roland Scheidegger wrote:
> > > >
> > > > > The vmwgfx ones look all good to me, so for
> > > > > 23-53: Reviewed-by: Roland Scheidegger 
> > > > > That said, they were already signed off by Zack, so not sure what
> > > > > happened here.
> > > >
> > > > Yes, they were accepted at one point, then dropped without a reason.
> > > >
> > > > Since I rebased onto the latest -next, I had to pluck them back out of
> > > > a previous one.
> > >
> > > They should show up in linux-next again. We merge patches for next merge
> > > window even during the current merge window, but need to make sure they
> > > don't pollute linux-next. Occasionally the cut off is wrong so patches
> > > show up, and then get pulled again.
> > >
> > > Unfortunately especially the 5.12 merge cycle was very wobbly due to some
> > > confusion here. But your patches should all be in linux-next again (they
> > > are queued up for 5.13 in drm-misc-next, I checked that).
> > >
> > > Sorry for the confusion here.
> >
> > Oh, I see.  Well so long as they don't get dropped, I'll be happy.
> >
> > Thanks for the explanation Daniel
>
> After rebasing today, all of my GPU patches have remained.  Would
> someone be kind enough to check that everything is still in order
> please?

It's still broken somehow. I've kiced Maxime and Maarten again,
they're also on this thread.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Add irq register entry for dmub

2021-03-17 Thread Harry Wentland

On 2021-03-17 12:21 p.m., Rodrigo Siqueira wrote:

DCN2.1 and DCN3.0 are missing some macros that register irq entries
which cause compilation errors. This commit introduces those macros and
fix the compilation error.

Cc: Wayne Lin 
Cc: Solomon Chiu 
Fixes: 53e9c0f651421136 ("drm/amd/display: Support vertical interrupt 0 for all dcn 
ASIC")
Signed-off-by: Rodrigo Siqueira 


Reviewed-by: Harry Wentland 

Harry


---
  .../display/dc/irq/dcn21/irq_service_dcn21.c   | 17 +
  .../display/dc/irq/dcn30/irq_service_dcn30.c   | 18 ++
  2 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c 
b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
index 48a3c360174e..bc1249a9858c 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
@@ -209,6 +209,23 @@ static const struct irq_source_info_funcs 
vline0_irq_info_funcs = {
BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
mm ## block ## id ## _ ## reg_name
  
+#define SRI_DMUB(reg_name)\

+   BASE(mm ## reg_name ## _BASE_IDX) + \
+   mm ## reg_name
+
+#define IRQ_REG_ENTRY_DMUB(reg1, mask1, reg2, mask2)\
+   .enable_reg = SRI_DMUB(reg1),\
+   .enable_mask = \
+   reg1 ## __ ## mask1 ## _MASK,\
+   .enable_value = {\
+   reg1 ## __ ## mask1 ## _MASK,\
+   ~reg1 ## __ ## mask1 ## _MASK \
+   },\
+   .ack_reg = SRI_DMUB(reg2),\
+   .ack_mask = \
+   reg2 ## __ ## mask2 ## _MASK,\
+   .ack_value = \
+   reg2 ## __ ## mask2 ## _MASK \
  
  #define IRQ_REG_ENTRY(block, reg_num, reg1, mask1, reg2, mask2)\

.enable_reg = SRI(reg1, block, reg_num),\
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c 
b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
index 68f8f554c925..5af52ad49d7c 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
@@ -276,6 +276,24 @@ static const struct irq_source_info_funcs 
vline0_irq_info_funcs = {
.funcs = _irq_info_funcs\
}
  
+#define SRI_DMUB(reg_name)\

+   BASE(mm ## reg_name ## _BASE_IDX) + \
+   mm ## reg_name
+
+#define IRQ_REG_ENTRY_DMUB(reg1, mask1, reg2, mask2)\
+   .enable_reg = SRI_DMUB(reg1),\
+   .enable_mask = \
+   reg1 ## __ ## mask1 ## _MASK,\
+   .enable_value = {\
+   reg1 ## __ ## mask1 ## _MASK,\
+   ~reg1 ## __ ## mask1 ## _MASK \
+   },\
+   .ack_reg = SRI_DMUB(reg2),\
+   .ack_mask = \
+   reg2 ## __ ## mask2 ## _MASK,\
+   .ack_value = \
+   reg2 ## __ ## mask2 ## _MASK \
+
  #define dmub_trace_int_entry()\
[DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\
IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, 
DMCUB_OUTBOX0_READY_INT_EN,\



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2 V2] platform/x86: force LPS0 functions for AMD

2021-03-17 Thread Hans de Goede
Hi,

On 3/17/21 3:38 PM, Alex Deucher wrote:
> ACPI_LPS0_ENTRY_AMD/ACPI_LPS0_EXIT_AMD are supposedly not
> required for AMD platforms, and on some platforms they are
> not even listed in the function mask but at least some HP
> laptops seem to require it to properly support s0ix.
> 
> Based on a patch from Marcin Bachry .
> 
> Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1230
> Signed-off-by: Alex Deucher 
> Cc: Marcin Bachry 
> ---
> 
> V2: rework the patch to just fix up the specific problematic
> case by setting the function flags that are missing.

Thanks, the new version looks good to me:

Reviewed-by: Hans de Goede 

Regards,

Hans


> 
>  drivers/acpi/x86/s2idle.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
> index 2d7ddb8a8cb6..482e6b23b21a 100644
> --- a/drivers/acpi/x86/s2idle.c
> +++ b/drivers/acpi/x86/s2idle.c
> @@ -368,6 +368,13 @@ static int lps0_device_attach(struct acpi_device *adev,
>  
>   ACPI_FREE(out_obj);
>  
> + /*
> +  * Some HP laptops require ACPI_LPS0_ENTRY_AMD/ACPI_LPS0_EXIT_AMD for 
> proper
> +  * S0ix, but don't set the function mask correctly.  Fix that up here.
> +  */
> + if (acpi_s2idle_vendor_amd())
> + lps0_dsm_func_mask |= (1 << ACPI_LPS0_ENTRY_AMD) | (1 << 
> ACPI_LPS0_EXIT_AMD);
> +
>   acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n",
> lps0_dsm_func_mask);
>  
> 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Remove unused calls

2021-03-17 Thread Rodrigo Siqueira
Reviewed-by: Rodrigo Siqueira 

On 03/17, Qingqing Zhuo wrote:
> dmub_trace_int_entry() was introduced unintentionally, causing
> compiling issues.
> 
> Fixes: 53e9c0f651421136 ("drm/amd/display: Support vertical interrupt 0 for 
> all dcn ASIC")
> Signed-off-by: Qingqing Zhuo 
> Acked-by: Alex Deucher 
> ---
>  .../gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c  | 8 
>  .../gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c  | 8 
>  2 files changed, 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c 
> b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
> index 48a3c360174e..3908ad929176 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
> +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
> @@ -278,13 +278,6 @@ static const struct irq_source_info_funcs 
> vline0_irq_info_funcs = {
>   .funcs = _irq_info_funcs\
>   }
>  
> -#define dmub_trace_int_entry()\
> - [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\
> - IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, 
> DMCUB_OUTBOX0_READY_INT_EN,\
> - DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\
> - .funcs = _trace_irq_info_funcs\
> - }
> -
>  #define vline0_int_entry(reg_num)\
>   [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\
>   IRQ_REG_ENTRY(OTG, reg_num,\
> @@ -411,7 +404,6 @@ irq_source_info_dcn21[DAL_IRQ_SOURCES_NUMBER] = {
>   vline0_int_entry(3),
>   vline0_int_entry(4),
>   vline0_int_entry(5),
> - dmub_trace_int_entry(),
>  };
>  
>  static const struct irq_service_funcs irq_service_funcs_dcn21 = {
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c 
> b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
> index 68f8f554c925..4ec6f6ad8c48 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
> +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
> @@ -276,13 +276,6 @@ static const struct irq_source_info_funcs 
> vline0_irq_info_funcs = {
>   .funcs = _irq_info_funcs\
>   }
>  
> -#define dmub_trace_int_entry()\
> - [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\
> - IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, 
> DMCUB_OUTBOX0_READY_INT_EN,\
> - DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\
> - .funcs = _trace_irq_info_funcs\
> - }
> -
>  #define vline0_int_entry(reg_num)\
>   [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\
>   IRQ_REG_ENTRY(OTG, reg_num,\
> @@ -405,7 +398,6 @@ irq_source_info_dcn30[DAL_IRQ_SOURCES_NUMBER] = {
>   vline0_int_entry(3),
>   vline0_int_entry(4),
>   vline0_int_entry(5),
> - dmub_trace_int_entry(),
>  };
>  
>  static const struct irq_service_funcs irq_service_funcs_dcn30 = {
> -- 
> 2.17.1
> 

-- 
Rodrigo Siqueira
https://siqueira.tech


signature.asc
Description: PGP signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: Remove unused calls

2021-03-17 Thread Qingqing Zhuo
dmub_trace_int_entry() was introduced unintentionally, causing
compiling issues.

Fixes: 53e9c0f651421136 ("drm/amd/display: Support vertical interrupt 0 for all 
dcn ASIC")
Signed-off-by: Qingqing Zhuo 
Acked-by: Alex Deucher 
---
 .../gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c  | 8 
 .../gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c  | 8 
 2 files changed, 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c 
b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
index 48a3c360174e..3908ad929176 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
@@ -278,13 +278,6 @@ static const struct irq_source_info_funcs 
vline0_irq_info_funcs = {
.funcs = _irq_info_funcs\
}
 
-#define dmub_trace_int_entry()\
-   [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\
-   IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, 
DMCUB_OUTBOX0_READY_INT_EN,\
-   DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\
-   .funcs = _trace_irq_info_funcs\
-   }
-
 #define vline0_int_entry(reg_num)\
[DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\
IRQ_REG_ENTRY(OTG, reg_num,\
@@ -411,7 +404,6 @@ irq_source_info_dcn21[DAL_IRQ_SOURCES_NUMBER] = {
vline0_int_entry(3),
vline0_int_entry(4),
vline0_int_entry(5),
-   dmub_trace_int_entry(),
 };
 
 static const struct irq_service_funcs irq_service_funcs_dcn21 = {
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c 
b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
index 68f8f554c925..4ec6f6ad8c48 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
@@ -276,13 +276,6 @@ static const struct irq_source_info_funcs 
vline0_irq_info_funcs = {
.funcs = _irq_info_funcs\
}
 
-#define dmub_trace_int_entry()\
-   [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\
-   IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, 
DMCUB_OUTBOX0_READY_INT_EN,\
-   DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\
-   .funcs = _trace_irq_info_funcs\
-   }
-
 #define vline0_int_entry(reg_num)\
[DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\
IRQ_REG_ENTRY(OTG, reg_num,\
@@ -405,7 +398,6 @@ irq_source_info_dcn30[DAL_IRQ_SOURCES_NUMBER] = {
vline0_int_entry(3),
vline0_int_entry(4),
vline0_int_entry(5),
-   dmub_trace_int_entry(),
 };
 
 static const struct irq_service_funcs irq_service_funcs_dcn30 = {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Allow idle optimization based on vblank.

2021-03-17 Thread Lakha, Bhawanpreet
[AMD Official Use Only - Internal Distribution Only]

Hi Bindu,

dc_allow_idle_optimizations() should be called within mutex_lock(>dc_lock). 
Please call it right after WARN_ON(!dc_commit_state(dm->dc, dc_state)) but 
before unlock().

Bhawan

From: amd-gfx  on behalf of Michel 
Dänzer 
Sent: March 17, 2021 7:37 AM
To: R, Bindu ; amd-gfx@lists.freedesktop.org 

Cc: Deucher, Alexander ; Feng, Kenneth 
; Zhou1, Tao ; Lakha, Bhawanpreet 

Subject: Re: [PATCH] drm/amd/display: Allow idle optimization based on vblank.

On 2021-03-17 12:49 a.m., Bindu Ramamurthy wrote:
> [Why]
> idle optimization was being disabled after commit.
>
> [How]
> check vblank count for display off and enable idle optimization based on this 
> count.
>
> Signed-off-by: Bindu Ramamurthy 
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 553e39f9538c..56a55143ad2d 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -987,7 +987,7 @@ static void event_mall_stutter(struct work_struct *work)
>
>if (vblank_work->enable)
>dm->active_vblank_irq_count++;
> - else
> + else if(dm->active_vblank_irq_count)
>dm->active_vblank_irq_count--;

The commit log should explain why this part is needed.


--
Earthling Michel Dänzer   |   
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fredhat.com%2Fdata=04%7C01%7Cbhawanpreet.lakha%40amd.com%7C11fd0779679742148e2a08d8e938fe34%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637515778296313590%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=%2BPsyjDxRCTdR9HL1QSlCraWo7YpFg%2FJT8i%2BSsG%2BQvZE%3Dreserved=0
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cbhawanpreet.lakha%40amd.com%7C11fd0779679742148e2a08d8e938fe34%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637515778296313590%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=L4fr6qPeO5rcMi0zg9bk9xLRTKtyVRTJ3LcSPd3Qlyw%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Amdgpu kernel oops and freezing on system suspend and hibernate

2021-03-17 Thread Deucher, Alexander
[AMD Official Use Only - Internal Distribution Only]

Please try the latest patch set on this bug:
https://gitlab.freedesktop.org/drm/amd/-/issues/1230

Alex

From: amd-gfx  on behalf of Harvey 

Sent: Wednesday, March 17, 2021 12:20 PM
To: amd-gfx@lists.freedesktop.org 
Subject: Amdgpu kernel oops and freezing on system suspend and hibernate

Hello,

I own a laptop, a MSI Bravo 17 A4DDR/MS-17FK
with Ryzen 7 4800U and hybrid graphics on a Radeon RX 5500M.

DMI: Micro-Star International Co., Ltd. Bravo 17 A4DDR/MS-17FK, BIOS
E17FKAMS.117 10/29/2020

The system does not hibernate, it just freezes. Starting after a reset
it then resumes from the swap partition and gets the system up, but
shortly after that freezes again.

Even suspending is not working properly - on archlinux with kernel
5.11.6 and on 5.12-rc1 I see the following kernel oopses after resume:

The output of dmesg -l err,warn is:

[11020.188925] [ cut here ]
[11020.188929] WARNING: CPU: 0 PID: 7736 at
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link.c:2574
dc_link_set_backlight_level+0x8a/0xf0 [amdgpu]
[11020.189314] Modules linked in: rfcomm snd_hda_codec_realtek
snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi cmac algif_hash
algif_skcipher af_alg bnep intel_rapl_msr intel_rapl_common iwlmvm
snd_hda_intel snd_intel_dspcfg soundwire_intel
soundwire_generic_allocation soundwire_cadence nls_iso8859_1 vfat
mac80211 snd_hda_codec fat edac_mce_amd uvcvideo btusb snd_hda_core
kvm_amd btrtl libarc4 videobuf2_vmalloc btbcm snd_hwdep videobuf2_memops
hid_multitouch soundwire_bus videobuf2_v4l2 btintel pktcdvd iwlwifi
snd_soc_core kvm videobuf2_common bluetooth snd_compress videodev
ac97_bus snd_pcm_dmaengine snd_pcm snd_timer irqbypass msi_wmi
ecdh_generic joydev mousedev cfg80211 mc ecc rapl snd psmouse
snd_rn_pci_acp3x pcspkr sparse_keymap k10temp i2c_piix4 snd_pci_acp3x
soundcore rfkill tpm_crb tpm_tis tpm_tis_core pinctrl_amd i2c_hid
acpi_cpufreq mac_hid soc_button_array vboxnetflt(OE) vboxnetadp(OE)
vboxdrv(OE) usbip_host usbip_core sg fuse crypto_user bpf_preload
ip_tables x_tables
[11020.189400]  ext4 crc32c_generic crc16 mbcache jbd2 sr_mod cdrom uas
usb_storage dm_crypt cbc encrypted_keys dm_mod trusted tpm
crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel
aesni_intel crypto_simd cryptd glue_helper serio_raw ccp xhci_pci
xhci_pci_renesas rng_core wmi video usbhid r8168(OE) amdgpu
drm_ttm_helper ttm gpu_sched i2c_algo_bit drm_kms_helper syscopyarea
sysfillrect sysimgblt fb_sys_fops cec drm agpgart
[11020.189445] CPU: 0 PID: 7736 Comm: systemd-sleep Tainted: G
  OE 5.11.6-arch1-1 #1
[11020.189450] Hardware name: Micro-Star International Co., Ltd. Bravo
17 A4DDR/MS-17FK, BIOS E17FKAMS.117 10/29/2020
[11020.189453] RIP: 0010:dc_link_set_backlight_level+0x8a/0xf0 [amdgpu]
[11020.189792] Code: 88 03 00 00 31 c0 48 8d 96 f0 01 00 00 48 8b 0a 48
85 c9 74 06 48 3b 59 08 74 20 83 c0 01 48 81 c2 d0 04 00 00 83 f8 06 75
e3 <0f> 0b 45 31 e4 5b 44 89 e0 5d 41 5c 41 5d 41 5e c3 48 98 48 69 c0
[11020.189795] RSP: 0018:c1f003373c38 EFLAGS: 00010246
[11020.189799] RAX: 0006 RBX: 9e244e0ea800 RCX:

[11020.189802] RDX: 9e2582fe1ed0 RSI: 9e2582fe RDI:

[11020.189804] RBP: 9e244e0f R08: 00f9 R09:
9e244323a000
[11020.189806] R10: 9e244323ae40 R11: 01320122 R12:
fa01
[11020.189808] R13:  R14: fa42 R15:
0003
[11020.189810] FS:  7f6219470a40() GS:9e275f60()
knlGS:
[11020.189813] CS:  0010 DS:  ES:  CR0: 80050033
[11020.189815] CR2: 7fb7a8980180 CR3: 000109cae000 CR4:
00350ef0
[11020.189818] Call Trace:
[11020.189828]  amdgpu_dm_backlight_update_status+0xb4/0xc0 [amdgpu]
[11020.190185]  backlight_suspend+0x6a/0x80
[11020.190192]  ? brightness_store+0x80/0x80
[11020.190197]  dpm_run_callback+0x4c/0x150
[11020.190202]  __device_suspend+0x11c/0x4d0
[11020.190205]  dpm_suspend+0xef/0x230
[11020.190209]  dpm_suspend_start+0x77/0x80
[11020.190213]  suspend_devices_and_enter+0x109/0x800
[11020.190219]  pm_suspend.cold+0x329/0x374
[11020.190225]  state_store+0x71/0xd0
[11020.190230]  kernfs_fop_write_iter+0x124/0x1b0
[11020.190236]  new_sync_write+0x159/0x1f0
[11020.190241]  vfs_write+0x1fc/0x2a0
[11020.190245]  ksys_write+0x67/0xe0
[11020.190249]  do_syscall_64+0x33/0x40
[11020.190255]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[11020.190261] RIP: 0033:0x7f6219de10f7
[11020.190265] Code: 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f
1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f
05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
[11020.190268] RSP: 002b:7fff7ae91318 EFLAGS: 0246 ORIG_RAX:
0001
[11020.190272] RAX: ffda RBX: 

Amdgpu kernel oops and freezing on system suspend and hibernate

2021-03-17 Thread Harvey

Hello,

I own a laptop, a MSI Bravo 17 A4DDR/MS-17FK
with Ryzen 7 4800U and hybrid graphics on a Radeon RX 5500M.

DMI: Micro-Star International Co., Ltd. Bravo 17 A4DDR/MS-17FK, BIOS 
E17FKAMS.117 10/29/2020


The system does not hibernate, it just freezes. Starting after a reset 
it then resumes from the swap partition and gets the system up, but 
shortly after that freezes again.


Even suspending is not working properly - on archlinux with kernel 
5.11.6 and on 5.12-rc1 I see the following kernel oopses after resume:


The output of dmesg -l err,warn is:

[11020.188925] [ cut here ]
[11020.188929] WARNING: CPU: 0 PID: 7736 at 
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link.c:2574 
dc_link_set_backlight_level+0x8a/0xf0 [amdgpu]
[11020.189314] Modules linked in: rfcomm snd_hda_codec_realtek 
snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi cmac algif_hash 
algif_skcipher af_alg bnep intel_rapl_msr intel_rapl_common iwlmvm 
snd_hda_intel snd_intel_dspcfg soundwire_intel 
soundwire_generic_allocation soundwire_cadence nls_iso8859_1 vfat 
mac80211 snd_hda_codec fat edac_mce_amd uvcvideo btusb snd_hda_core 
kvm_amd btrtl libarc4 videobuf2_vmalloc btbcm snd_hwdep videobuf2_memops 
hid_multitouch soundwire_bus videobuf2_v4l2 btintel pktcdvd iwlwifi 
snd_soc_core kvm videobuf2_common bluetooth snd_compress videodev 
ac97_bus snd_pcm_dmaengine snd_pcm snd_timer irqbypass msi_wmi 
ecdh_generic joydev mousedev cfg80211 mc ecc rapl snd psmouse 
snd_rn_pci_acp3x pcspkr sparse_keymap k10temp i2c_piix4 snd_pci_acp3x 
soundcore rfkill tpm_crb tpm_tis tpm_tis_core pinctrl_amd i2c_hid 
acpi_cpufreq mac_hid soc_button_array vboxnetflt(OE) vboxnetadp(OE) 
vboxdrv(OE) usbip_host usbip_core sg fuse crypto_user bpf_preload 
ip_tables x_tables
[11020.189400]  ext4 crc32c_generic crc16 mbcache jbd2 sr_mod cdrom uas 
usb_storage dm_crypt cbc encrypted_keys dm_mod trusted tpm 
crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel 
aesni_intel crypto_simd cryptd glue_helper serio_raw ccp xhci_pci 
xhci_pci_renesas rng_core wmi video usbhid r8168(OE) amdgpu 
drm_ttm_helper ttm gpu_sched i2c_algo_bit drm_kms_helper syscopyarea 
sysfillrect sysimgblt fb_sys_fops cec drm agpgart
[11020.189445] CPU: 0 PID: 7736 Comm: systemd-sleep Tainted: G 
 OE 5.11.6-arch1-1 #1
[11020.189450] Hardware name: Micro-Star International Co., Ltd. Bravo 
17 A4DDR/MS-17FK, BIOS E17FKAMS.117 10/29/2020

[11020.189453] RIP: 0010:dc_link_set_backlight_level+0x8a/0xf0 [amdgpu]
[11020.189792] Code: 88 03 00 00 31 c0 48 8d 96 f0 01 00 00 48 8b 0a 48 
85 c9 74 06 48 3b 59 08 74 20 83 c0 01 48 81 c2 d0 04 00 00 83 f8 06 75 
e3 <0f> 0b 45 31 e4 5b 44 89 e0 5d 41 5c 41 5d 41 5e c3 48 98 48 69 c0

[11020.189795] RSP: 0018:c1f003373c38 EFLAGS: 00010246
[11020.189799] RAX: 0006 RBX: 9e244e0ea800 RCX: 

[11020.189802] RDX: 9e2582fe1ed0 RSI: 9e2582fe RDI: 

[11020.189804] RBP: 9e244e0f R08: 00f9 R09: 
9e244323a000
[11020.189806] R10: 9e244323ae40 R11: 01320122 R12: 
fa01
[11020.189808] R13:  R14: fa42 R15: 
0003
[11020.189810] FS:  7f6219470a40() GS:9e275f60() 
knlGS:

[11020.189813] CS:  0010 DS:  ES:  CR0: 80050033
[11020.189815] CR2: 7fb7a8980180 CR3: 000109cae000 CR4: 
00350ef0

[11020.189818] Call Trace:
[11020.189828]  amdgpu_dm_backlight_update_status+0xb4/0xc0 [amdgpu]
[11020.190185]  backlight_suspend+0x6a/0x80
[11020.190192]  ? brightness_store+0x80/0x80
[11020.190197]  dpm_run_callback+0x4c/0x150
[11020.190202]  __device_suspend+0x11c/0x4d0
[11020.190205]  dpm_suspend+0xef/0x230
[11020.190209]  dpm_suspend_start+0x77/0x80
[11020.190213]  suspend_devices_and_enter+0x109/0x800
[11020.190219]  pm_suspend.cold+0x329/0x374
[11020.190225]  state_store+0x71/0xd0
[11020.190230]  kernfs_fop_write_iter+0x124/0x1b0
[11020.190236]  new_sync_write+0x159/0x1f0
[11020.190241]  vfs_write+0x1fc/0x2a0
[11020.190245]  ksys_write+0x67/0xe0
[11020.190249]  do_syscall_64+0x33/0x40
[11020.190255]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[11020.190261] RIP: 0033:0x7f6219de10f7
[11020.190265] Code: 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 
1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 
05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
[11020.190268] RSP: 002b:7fff7ae91318 EFLAGS: 0246 ORIG_RAX: 
0001
[11020.190272] RAX: ffda RBX: 0004 RCX: 
7f6219de10f7
[11020.190275] RDX: 0004 RSI: 7fff7ae91400 RDI: 
0004
[11020.190276] RBP: 7fff7ae91400 R08: 55e3a1261b70 R09: 
7f6219e770c0
[11020.190278] R10: 7f6219e76fc0 R11: 0246 R12: 
0004
[11020.190280] R13: 55e3a125d3c0 R14: 0004 R15: 
7f6219eb3700

[11020.190284] ---[ end trace 

Re: [PATCH] drm/amd/display: Add irq register entry for dmub

2021-03-17 Thread Deucher, Alexander
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Alex Deucher 

From: Siqueira, Rodrigo 
Sent: Wednesday, March 17, 2021 12:21 PM
To: Deucher, Alexander ; Wentland, Harry 

Cc: amd-gfx@lists.freedesktop.org ; Lin, Wayne 
; Chiu, Solomon 
Subject: [PATCH] drm/amd/display: Add irq register entry for dmub

DCN2.1 and DCN3.0 are missing some macros that register irq entries
which cause compilation errors. This commit introduces those macros and
fix the compilation error.

Cc: Wayne Lin 
Cc: Solomon Chiu 
Fixes: 53e9c0f651421136 ("drm/amd/display: Support vertical interrupt 0 for all 
dcn ASIC")
Signed-off-by: Rodrigo Siqueira 
---
 .../display/dc/irq/dcn21/irq_service_dcn21.c   | 17 +
 .../display/dc/irq/dcn30/irq_service_dcn30.c   | 18 ++
 2 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c 
b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
index 48a3c360174e..bc1249a9858c 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
@@ -209,6 +209,23 @@ static const struct irq_source_info_funcs 
vline0_irq_info_funcs = {
 BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 mm ## block ## id ## _ ## reg_name

+#define SRI_DMUB(reg_name)\
+   BASE(mm ## reg_name ## _BASE_IDX) + \
+   mm ## reg_name
+
+#define IRQ_REG_ENTRY_DMUB(reg1, mask1, reg2, mask2)\
+   .enable_reg = SRI_DMUB(reg1),\
+   .enable_mask = \
+   reg1 ## __ ## mask1 ## _MASK,\
+   .enable_value = {\
+   reg1 ## __ ## mask1 ## _MASK,\
+   ~reg1 ## __ ## mask1 ## _MASK \
+   },\
+   .ack_reg = SRI_DMUB(reg2),\
+   .ack_mask = \
+   reg2 ## __ ## mask2 ## _MASK,\
+   .ack_value = \
+   reg2 ## __ ## mask2 ## _MASK \

 #define IRQ_REG_ENTRY(block, reg_num, reg1, mask1, reg2, mask2)\
 .enable_reg = SRI(reg1, block, reg_num),\
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c 
b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
index 68f8f554c925..5af52ad49d7c 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
@@ -276,6 +276,24 @@ static const struct irq_source_info_funcs 
vline0_irq_info_funcs = {
 .funcs = _irq_info_funcs\
 }

+#define SRI_DMUB(reg_name)\
+   BASE(mm ## reg_name ## _BASE_IDX) + \
+   mm ## reg_name
+
+#define IRQ_REG_ENTRY_DMUB(reg1, mask1, reg2, mask2)\
+   .enable_reg = SRI_DMUB(reg1),\
+   .enable_mask = \
+   reg1 ## __ ## mask1 ## _MASK,\
+   .enable_value = {\
+   reg1 ## __ ## mask1 ## _MASK,\
+   ~reg1 ## __ ## mask1 ## _MASK \
+   },\
+   .ack_reg = SRI_DMUB(reg2),\
+   .ack_mask = \
+   reg2 ## __ ## mask2 ## _MASK,\
+   .ack_value = \
+   reg2 ## __ ## mask2 ## _MASK \
+
 #define dmub_trace_int_entry()\
 [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\
 IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, 
DMCUB_OUTBOX0_READY_INT_EN,\
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: Add irq register entry for dmub

2021-03-17 Thread Rodrigo Siqueira
DCN2.1 and DCN3.0 are missing some macros that register irq entries
which cause compilation errors. This commit introduces those macros and
fix the compilation error.

Cc: Wayne Lin 
Cc: Solomon Chiu 
Fixes: 53e9c0f651421136 ("drm/amd/display: Support vertical interrupt 0 for all 
dcn ASIC")
Signed-off-by: Rodrigo Siqueira 
---
 .../display/dc/irq/dcn21/irq_service_dcn21.c   | 17 +
 .../display/dc/irq/dcn30/irq_service_dcn30.c   | 18 ++
 2 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c 
b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
index 48a3c360174e..bc1249a9858c 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
@@ -209,6 +209,23 @@ static const struct irq_source_info_funcs 
vline0_irq_info_funcs = {
BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
mm ## block ## id ## _ ## reg_name
 
+#define SRI_DMUB(reg_name)\
+   BASE(mm ## reg_name ## _BASE_IDX) + \
+   mm ## reg_name
+
+#define IRQ_REG_ENTRY_DMUB(reg1, mask1, reg2, mask2)\
+   .enable_reg = SRI_DMUB(reg1),\
+   .enable_mask = \
+   reg1 ## __ ## mask1 ## _MASK,\
+   .enable_value = {\
+   reg1 ## __ ## mask1 ## _MASK,\
+   ~reg1 ## __ ## mask1 ## _MASK \
+   },\
+   .ack_reg = SRI_DMUB(reg2),\
+   .ack_mask = \
+   reg2 ## __ ## mask2 ## _MASK,\
+   .ack_value = \
+   reg2 ## __ ## mask2 ## _MASK \
 
 #define IRQ_REG_ENTRY(block, reg_num, reg1, mask1, reg2, mask2)\
.enable_reg = SRI(reg1, block, reg_num),\
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c 
b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
index 68f8f554c925..5af52ad49d7c 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
@@ -276,6 +276,24 @@ static const struct irq_source_info_funcs 
vline0_irq_info_funcs = {
.funcs = _irq_info_funcs\
}
 
+#define SRI_DMUB(reg_name)\
+   BASE(mm ## reg_name ## _BASE_IDX) + \
+   mm ## reg_name
+
+#define IRQ_REG_ENTRY_DMUB(reg1, mask1, reg2, mask2)\
+   .enable_reg = SRI_DMUB(reg1),\
+   .enable_mask = \
+   reg1 ## __ ## mask1 ## _MASK,\
+   .enable_value = {\
+   reg1 ## __ ## mask1 ## _MASK,\
+   ~reg1 ## __ ## mask1 ## _MASK \
+   },\
+   .ack_reg = SRI_DMUB(reg2),\
+   .ack_mask = \
+   reg2 ## __ ## mask2 ## _MASK,\
+   .ack_value = \
+   reg2 ## __ ## mask2 ## _MASK \
+
 #define dmub_trace_int_entry()\
[DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\
IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, 
DMCUB_OUTBOX0_READY_INT_EN,\
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu/ttm: Fix memory leak userptr pages

2021-03-17 Thread Daniel Gomez
If userptr pages have been pinned but not bounded,
they remain uncleared.

Signed-off-by: Daniel Gomez 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 9fd2157b133a..50c2b4827c13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1162,13 +1162,13 @@ static void amdgpu_ttm_backend_unbind(struct 
ttm_bo_device *bdev,
struct amdgpu_ttm_tt *gtt = (void *)ttm;
int r;
 
-   if (!gtt->bound)
-   return;
-
/* if the pages have userptr pinning then clear that first */
if (gtt->userptr)
amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
 
+   if (!gtt->bound)
+   return;
+
if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
return;
 
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: Try YCbCr420 color when YCbCr444 fails

2021-03-17 Thread Werner Sembach
When encoder validation of a display mode fails, retry with less bandwidth
heavy YCbCr420 color mode, if available. This enables some HDMI 1.4 setups
to support 4k60Hz output, which previously failed silently.

On some setups, while the monitor and the gpu support display modes with
pixel clocks of up to 600MHz, the link encoder might not. This prevents
YCbCr444 and RGB encoding for 4k60Hz, but YCbCr420 encoding might still be
possible. However, which color mode is used is decided before the link
encoder capabilities are checked. This patch fixes the problem by retrying
to find a display mode with YCbCr420 enforced and using it, if it is
valid.

Signed-off-by: Werner Sembach 
Cc: 
---

>From c9398160caf4ff20e63b8ba3a4366d6ef95c4ac3 Mon Sep 17 00:00:00 2001
From: Werner Sembach 
Date: Wed, 17 Mar 2021 12:52:22 +0100
Subject: [PATCH] Retry forcing YCbCr420 color on failed encoder validation

---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 961abf1cf040..2d16389b5f1e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5727,6 +5727,15 @@ create_validate_stream_for_sink(struct 
amdgpu_dm_connector *aconnector,
 
} while (stream == NULL && requested_bpc >= 6);
 
+   if (dc_result == DC_FAIL_ENC_VALIDATE && 
!aconnector->force_yuv420_output) {
+   DRM_DEBUG_KMS("Retry forcing YCbCr420 encoding\n");
+
+   aconnector->force_yuv420_output = true;
+   stream = create_validate_stream_for_sink(aconnector, drm_mode,
+   dm_state, old_stream);
+   aconnector->force_yuv420_output = false;
+   }
+
return stream;
 }
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH V2] radeon: use kvcalloc for relocs and chunks

2021-03-17 Thread Alex Deucher
Applied.  Thanks!

Alex


On Wed, Mar 17, 2021 at 11:18 AM Christian König
 wrote:
>
> Am 17.03.21 um 15:48 schrieb Chen Li:
> >
> > kvmalloc_array + __GFP_ZERO is the same with kvcalloc.
> >
> > As for p->chunks, it will be used in:
> > ```
> > if (ib_chunk->kdata)
> >   memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 
> > 4);
> > ```
> >
> > If chunks doesn't zero out with __GFP_ZERO, it may point to somewhere else, 
> > e.g.,
> > ```
> > Unable to handle kernel paging request at virtual address 0001
> > ...
> > pc is at memcpy+0x84/0x250
> > ra is at radeon_cs_ioctl+0x368/0xb90 [radeon]
> > ```
> >
> > after allocating chunks with __GFP_KERNEL/kvcalloc, this bug is fixed.
> > Fixes: 3fcb4f01deed ("drm/radeon: Use kvmalloc for CS chunks")
> > Signed-off-by: Chen Li 
>
> Reviewed-by: Christian König 
>
> > ---
> >
> > changelog:
> >v1->v2: add Fixes: tag
> >
> >   drivers/gpu/drm/radeon/radeon_cs.c | 6 +++---
> >   1 file changed, 3 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
> > b/drivers/gpu/drm/radeon/radeon_cs.c
> > index fb736ef9f9aa..059431689c2d 100644
> > --- a/drivers/gpu/drm/radeon/radeon_cs.c
> > +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> > @@ -93,8 +93,8 @@ static int radeon_cs_parser_relocs(struct 
> > radeon_cs_parser *p)
> >   p->dma_reloc_idx = 0;
> >   /* FIXME: we assume that each relocs use 4 dwords */
> >   p->nrelocs = chunk->length_dw / 4;
> > - p->relocs = kvmalloc_array(p->nrelocs, sizeof(struct radeon_bo_list),
> > - GFP_KERNEL | __GFP_ZERO);
> > + p->relocs = kvcalloc(p->nrelocs, sizeof(struct radeon_bo_list),
> > + GFP_KERNEL);
> >   if (p->relocs == NULL) {
> >   return -ENOMEM;
> >   }
> > @@ -299,7 +299,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, 
> > void *data)
> >   }
> >   p->cs_flags = 0;
> >   p->nchunks = cs->num_chunks;
> > - p->chunks = kvmalloc_array(p->nchunks, sizeof(struct 
> > radeon_cs_chunk), GFP_KERNEL);
> > + p->chunks = kvcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), 
> > GFP_KERNEL);
> >   if (p->chunks == NULL) {
> >   return -ENOMEM;
> >   }
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH V2] radeon: use kvcalloc for relocs and chunks

2021-03-17 Thread Christian König

Am 17.03.21 um 15:48 schrieb Chen Li:


kvmalloc_array + __GFP_ZERO is the same with kvcalloc.

As for p->chunks, it will be used in:
```
if (ib_chunk->kdata)
memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 
4);
```

If chunks doesn't zero out with __GFP_ZERO, it may point to somewhere else, 
e.g.,
```
Unable to handle kernel paging request at virtual address 0001
...
pc is at memcpy+0x84/0x250
ra is at radeon_cs_ioctl+0x368/0xb90 [radeon]
```

after allocating chunks with __GFP_KERNEL/kvcalloc, this bug is fixed.
Fixes: 3fcb4f01deed ("drm/radeon: Use kvmalloc for CS chunks")
Signed-off-by: Chen Li 


Reviewed-by: Christian König 


---

changelog:
   v1->v2: add Fixes: tag

  drivers/gpu/drm/radeon/radeon_cs.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index fb736ef9f9aa..059431689c2d 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -93,8 +93,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
p->dma_reloc_idx = 0;
/* FIXME: we assume that each relocs use 4 dwords */
p->nrelocs = chunk->length_dw / 4;
-   p->relocs = kvmalloc_array(p->nrelocs, sizeof(struct radeon_bo_list),
-   GFP_KERNEL | __GFP_ZERO);
+   p->relocs = kvcalloc(p->nrelocs, sizeof(struct radeon_bo_list),
+   GFP_KERNEL);
if (p->relocs == NULL) {
return -ENOMEM;
}
@@ -299,7 +299,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
}
p->cs_flags = 0;
p->nchunks = cs->num_chunks;
-   p->chunks = kvmalloc_array(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
+   p->chunks = kvcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
if (p->chunks == NULL) {
return -ENOMEM;
}


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: check fb of primary plane

2021-03-17 Thread Alex Deucher
On Wed, Mar 17, 2021 at 10:05 AM Harry Wentland  wrote:
>
>
>
> On 2021-03-16 5:50 p.m., Sefa Eyeoglu wrote:
> > Sometimes the primary plane might not be initialized (yet), which
> > causes dm_check_crtc_cursor to divide by zero.
> > Apparently a weird state before a S3-suspend causes the aforementioned
> > divide-by-zero error when resuming from S3.  This was explained in
> > bug 212293 on Bugzilla.
> >
> > To avoid this divide-by-zero error we check if the primary plane's fb
> > isn't NULL.  If it's NULL the src_w and src_h attributes will be 0,
> > which would cause a divide-by-zero.
> >
> > This fixes Bugzilla report 212293
> > https://bugzilla.kernel.org/show_bug.cgi?id=212293>>
> > Fixes: 12f4849a1cfd69f3 ("drm/amd/display: check cursor scaling")
> > Signed-off-by: Sefa Eyeoglu 
>
> Thanks for your patch.
>
> It is
> Reviewed-by: Harry Wentland 

Applied.  Thanks!

Alex

>
> Harry
>
> > ---
> >   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++-
> >   1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index 573cf17262da4e11..fbb1ac223ccbb62a 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -9267,7 +9267,8 @@ static int dm_check_crtc_cursor(struct 
> > drm_atomic_state *state,
> >
> >   new_cursor_state = drm_atomic_get_new_plane_state(state, 
> > crtc->cursor);
> >   new_primary_state = drm_atomic_get_new_plane_state(state, 
> > crtc->primary);
> > - if (!new_cursor_state || !new_primary_state || !new_cursor_state->fb) 
> > {
> > + if (!new_cursor_state || !new_primary_state ||
> > + !new_cursor_state->fb || !new_primary_state->fb) {
> >   return 0;
> >   }
> >
> >
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

2021-03-17 Thread Zhang, Jack (Jian)
[AMD Official Use Only - Internal Distribution Only]

Hi,Andrey,

Good catch,I will expore this corner case and give feedback soon~

Best,
Jack


From: Grodzovsky, Andrey 
Sent: Wednesday, March 17, 2021 10:50:59 PM
To: Christian König ; Zhang, Jack (Jian) 
; dri-de...@lists.freedesktop.org 
; amd-gfx@lists.freedesktop.org 
; Koenig, Christian ; 
Liu, Monk ; Deng, Emily ; Rob Herring 
; Tomeu Vizoso ; Steven Price 

Subject: Re: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

I actually have a race condition concern here - see bellow -

On 2021-03-17 3:43 a.m., Christian König wrote:
> I was hoping Andrey would take a look since I'm really busy with other
> work right now.
>
> Regards,
> Christian.
>
> Am 17.03.21 um 07:46 schrieb Zhang, Jack (Jian):
>> Hi, Andrey/Crhistian and Team,
>>
>> I didn't receive the reviewer's message from maintainers on panfrost
>> driver for several days.
>> Due to this patch is urgent for my current working project.
>> Would you please help to give some review ideas?
>>
>> Many Thanks,
>> Jack
>> -Original Message-
>> From: Zhang, Jack (Jian)
>> Sent: Tuesday, March 16, 2021 3:20 PM
>> To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
>> Koenig, Christian ; Grodzovsky, Andrey
>> ; Liu, Monk ; Deng,
>> Emily ; Rob Herring ; Tomeu
>> Vizoso ; Steven Price 
>> Subject: RE: [PATCH v3] drm/scheduler re-insert Bailing job to avoid
>> memleak
>>
>> [AMD Public Use]
>>
>> Ping
>>
>> -Original Message-
>> From: Zhang, Jack (Jian)
>> Sent: Monday, March 15, 2021 1:24 PM
>> To: Jack Zhang ;
>> dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
>> Koenig, Christian ; Grodzovsky, Andrey
>> ; Liu, Monk ; Deng,
>> Emily ; Rob Herring ; Tomeu
>> Vizoso ; Steven Price 
>> Subject: RE: [PATCH v3] drm/scheduler re-insert Bailing job to avoid
>> memleak
>>
>> [AMD Public Use]
>>
>> Hi, Rob/Tomeu/Steven,
>>
>> Would you please help to review this patch for panfrost driver?
>>
>> Thanks,
>> Jack Zhang
>>
>> -Original Message-
>> From: Jack Zhang 
>> Sent: Monday, March 15, 2021 1:21 PM
>> To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
>> Koenig, Christian ; Grodzovsky, Andrey
>> ; Liu, Monk ; Deng,
>> Emily 
>> Cc: Zhang, Jack (Jian) 
>> Subject: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak
>>
>> re-insert Bailing jobs to avoid memory leak.
>>
>> V2: move re-insert step to drm/scheduler logic
>> V3: add panfrost's return value for bailing jobs in case it hits the
>> memleak issue.
>>
>> Signed-off-by: Jack Zhang 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 8 ++--
>>   drivers/gpu/drm/panfrost/panfrost_job.c| 4 ++--
>>   drivers/gpu/drm/scheduler/sched_main.c | 8 +++-
>>   include/drm/gpu_scheduler.h| 1 +
>>   5 files changed, 19 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 79b9cc73763f..86463b0f936e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -4815,8 +4815,10 @@ int amdgpu_device_gpu_recover(struct
>> amdgpu_device *adev,
>>   job ? job->base.id : -1);
>> /* even we skipped this reset, still need to set the job
>> to guilty */
>> -if (job)
>> +if (job) {
>>   drm_sched_increase_karma(>base);
>> +r = DRM_GPU_SCHED_STAT_BAILING;
>> +}
>>   goto skip_recovery;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> index 759b34799221..41390bdacd9e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> @@ -34,6 +34,7 @@ static enum drm_gpu_sched_stat
>> amdgpu_job_timedout(struct drm_sched_job *s_job)
>>   struct amdgpu_job *job = to_amdgpu_job(s_job);
>>   struct amdgpu_task_info ti;
>>   struct amdgpu_device *adev = ring->adev;
>> +int ret;
>> memset(, 0, sizeof(struct amdgpu_task_info));
>>   @@ -52,8 +53,11 @@ static enum drm_gpu_sched_stat
>> amdgpu_job_timedout(struct drm_sched_job *s_job)
>> ti.process_name, ti.tgid, ti.task_name, ti.pid);
>> if (amdgpu_device_should_recover_gpu(ring->adev)) {
>> -amdgpu_device_gpu_recover(ring->adev, job);
>> -return DRM_GPU_SCHED_STAT_NOMINAL;
>> +ret = amdgpu_device_gpu_recover(ring->adev, job);
>> +if (ret == DRM_GPU_SCHED_STAT_BAILING)
>> +return DRM_GPU_SCHED_STAT_BAILING;
>> +else
>> +return DRM_GPU_SCHED_STAT_NOMINAL;
>>   } else {
>>   drm_sched_suspend_timeout(>sched);
>>   if (amdgpu_sriov_vf(adev))
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c
>> b/drivers/gpu/drm/panfrost/panfrost_job.c
>> index 

[PATCH V2] radeon: use kvcalloc for relocs and chunks

2021-03-17 Thread Chen Li



kvmalloc_array + __GFP_ZERO is the same with kvcalloc.

As for p->chunks, it will be used in:
```
if (ib_chunk->kdata)
memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 
4);
```

If chunks doesn't zero out with __GFP_ZERO, it may point to somewhere else, 
e.g.,
```
Unable to handle kernel paging request at virtual address 0001
...
pc is at memcpy+0x84/0x250
ra is at radeon_cs_ioctl+0x368/0xb90 [radeon]
```

after allocating chunks with __GFP_KERNEL/kvcalloc, this bug is fixed.
Fixes: 3fcb4f01deed ("drm/radeon: Use kvmalloc for CS chunks")
Signed-off-by: Chen Li 
---

changelog:
  v1->v2: add Fixes: tag

 drivers/gpu/drm/radeon/radeon_cs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index fb736ef9f9aa..059431689c2d 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -93,8 +93,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
p->dma_reloc_idx = 0;
/* FIXME: we assume that each relocs use 4 dwords */
p->nrelocs = chunk->length_dw / 4;
-   p->relocs = kvmalloc_array(p->nrelocs, sizeof(struct radeon_bo_list),
-   GFP_KERNEL | __GFP_ZERO);
+   p->relocs = kvcalloc(p->nrelocs, sizeof(struct radeon_bo_list),
+   GFP_KERNEL);
if (p->relocs == NULL) {
return -ENOMEM;
}
@@ -299,7 +299,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
}
p->cs_flags = 0;
p->nchunks = cs->num_chunks;
-   p->chunks = kvmalloc_array(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
+   p->chunks = kvcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
if (p->chunks == NULL) {
return -ENOMEM;
}
-- 
2.30.1





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH V2] radeon: use kvcalloc for relocs and chunks

2021-03-17 Thread Christian König

Am 17.03.21 um 15:48 schrieb Chen Li:


kvmalloc_array + __GFP_ZERO is the same with kvcalloc.

As for p->chunks, it will be used in:
```
if (ib_chunk->kdata)
memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 
4);
```

If chunks doesn't zero out with __GFP_ZERO, it may point to somewhere else, 
e.g.,
```
Unable to handle kernel paging request at virtual address 0001
...
pc is at memcpy+0x84/0x250
ra is at radeon_cs_ioctl+0x368/0xb90 [radeon]
```

after allocating chunks with __GFP_KERNEL/kvcalloc, this bug is fixed.
Fixes: 3fcb4f01deed ("drm/radeon: Use kvmalloc for CS chunks")
Signed-off-by: Chen Li 


Reviewed-by: Christian König 


---

changelog:
   v1->v2: add Fixes: tag

  drivers/gpu/drm/radeon/radeon_cs.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index fb736ef9f9aa..059431689c2d 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -93,8 +93,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
p->dma_reloc_idx = 0;
/* FIXME: we assume that each relocs use 4 dwords */
p->nrelocs = chunk->length_dw / 4;
-   p->relocs = kvmalloc_array(p->nrelocs, sizeof(struct radeon_bo_list),
-   GFP_KERNEL | __GFP_ZERO);
+   p->relocs = kvcalloc(p->nrelocs, sizeof(struct radeon_bo_list),
+   GFP_KERNEL);
if (p->relocs == NULL) {
return -ENOMEM;
}
@@ -299,7 +299,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
}
p->cs_flags = 0;
p->nchunks = cs->num_chunks;
-   p->chunks = kvmalloc_array(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
+   p->chunks = kvcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
if (p->chunks == NULL) {
return -ENOMEM;
}


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

2021-03-17 Thread Andrey Grodzovsky

I actually have a race condition concern here - see bellow -

On 2021-03-17 3:43 a.m., Christian König wrote:
I was hoping Andrey would take a look since I'm really busy with other 
work right now.


Regards,
Christian.

Am 17.03.21 um 07:46 schrieb Zhang, Jack (Jian):

Hi, Andrey/Crhistian and Team,

I didn't receive the reviewer's message from maintainers on panfrost 
driver for several days.

Due to this patch is urgent for my current working project.
Would you please help to give some review ideas?

Many Thanks,
Jack
-Original Message-
From: Zhang, Jack (Jian)
Sent: Tuesday, March 16, 2021 3:20 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; 
Koenig, Christian ; Grodzovsky, Andrey 
; Liu, Monk ; Deng, 
Emily ; Rob Herring ; Tomeu 
Vizoso ; Steven Price 
Subject: RE: [PATCH v3] drm/scheduler re-insert Bailing job to avoid 
memleak


[AMD Public Use]

Ping

-Original Message-
From: Zhang, Jack (Jian)
Sent: Monday, March 15, 2021 1:24 PM
To: Jack Zhang ; 
dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; 
Koenig, Christian ; Grodzovsky, Andrey 
; Liu, Monk ; Deng, 
Emily ; Rob Herring ; Tomeu 
Vizoso ; Steven Price 
Subject: RE: [PATCH v3] drm/scheduler re-insert Bailing job to avoid 
memleak


[AMD Public Use]

Hi, Rob/Tomeu/Steven,

Would you please help to review this patch for panfrost driver?

Thanks,
Jack Zhang

-Original Message-
From: Jack Zhang 
Sent: Monday, March 15, 2021 1:21 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; 
Koenig, Christian ; Grodzovsky, Andrey 
; Liu, Monk ; Deng, 
Emily 

Cc: Zhang, Jack (Jian) 
Subject: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

re-insert Bailing jobs to avoid memory leak.

V2: move re-insert step to drm/scheduler logic
V3: add panfrost's return value for bailing jobs in case it hits the 
memleak issue.


Signed-off-by: Jack Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    | 8 ++--
  drivers/gpu/drm/panfrost/panfrost_job.c    | 4 ++--
  drivers/gpu/drm/scheduler/sched_main.c | 8 +++-
  include/drm/gpu_scheduler.h    | 1 +
  5 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 79b9cc73763f..86463b0f936e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4815,8 +4815,10 @@ int amdgpu_device_gpu_recover(struct 
amdgpu_device *adev,

  job ? job->base.id : -1);
    /* even we skipped this reset, still need to set the job 
to guilty */

-    if (job)
+    if (job) {
  drm_sched_increase_karma(>base);
+    r = DRM_GPU_SCHED_STAT_BAILING;
+    }
  goto skip_recovery;
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index 759b34799221..41390bdacd9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -34,6 +34,7 @@ static enum drm_gpu_sched_stat 
amdgpu_job_timedout(struct drm_sched_job *s_job)

  struct amdgpu_job *job = to_amdgpu_job(s_job);
  struct amdgpu_task_info ti;
  struct amdgpu_device *adev = ring->adev;
+    int ret;
    memset(, 0, sizeof(struct amdgpu_task_info));
  @@ -52,8 +53,11 @@ static enum drm_gpu_sched_stat 
amdgpu_job_timedout(struct drm_sched_job *s_job)

    ti.process_name, ti.tgid, ti.task_name, ti.pid);
    if (amdgpu_device_should_recover_gpu(ring->adev)) {
-    amdgpu_device_gpu_recover(ring->adev, job);
-    return DRM_GPU_SCHED_STAT_NOMINAL;
+    ret = amdgpu_device_gpu_recover(ring->adev, job);
+    if (ret == DRM_GPU_SCHED_STAT_BAILING)
+    return DRM_GPU_SCHED_STAT_BAILING;
+    else
+    return DRM_GPU_SCHED_STAT_NOMINAL;
  } else {
  drm_sched_suspend_timeout(>sched);
  if (amdgpu_sriov_vf(adev))
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
b/drivers/gpu/drm/panfrost/panfrost_job.c

index 6003cfeb1322..e2cb4f32dae1 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -444,7 +444,7 @@ static enum drm_gpu_sched_stat 
panfrost_job_timedout(struct drm_sched_job

   * spurious. Bail out.
   */
  if (dma_fence_is_signaled(job->done_fence))
-    return DRM_GPU_SCHED_STAT_NOMINAL;
+    return DRM_GPU_SCHED_STAT_BAILING;
    dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, 
status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",

  js,
@@ -456,7 +456,7 @@ static enum drm_gpu_sched_stat 
panfrost_job_timedout(struct drm_sched_job

    /* Scheduler is already stopped, nothing to do. */
  if (!panfrost_scheduler_stop(>js->queue[js], sched_job))
-    return DRM_GPU_SCHED_STAT_NOMINAL;
+    return DRM_GPU_SCHED_STAT_BAILING;
  

[PATCH] drm/amdgpu/display/dm: add missing parameter documentation

2021-03-17 Thread Alex Deucher
Added a new parameter and forgot to update the documentation.

Fixes: b6f91fc183f7 ("drm/amdgpu/display: buffer INTERRUPT_LOW_IRQ_CONTEXT 
interrupt work")
Signed-off-by: Alex Deucher 
Cc: Stephen Rothwell 
Cc: Xiaogang Chen 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index d3c687d07ee6..a682b070f12c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -73,6 +73,7 @@
  * @handler_arg: Argument passed to the handler when triggered
  * @dm: DM which this handler belongs to
  * @irq_source: DC interrupt source that this handler is registered for
+ * @work: work struct
  */
 struct amdgpu_dm_irq_handler_data {
struct list_head list;
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] platform/x86: Add missing LPS0 functions for AMD

2021-03-17 Thread Alex Deucher
These are supposedly not required for AMD platforms,
but at least some HP laptops seem to require it to
properly turn off the keyboard backlight.

Based on a patch from Marcin Bachry .

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1230
Reviewed-by: Hans de Goede 
Signed-off-by: Alex Deucher 
Cc: Marcin Bachry 
---
 drivers/acpi/x86/s2idle.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 2b69536cdccb..2d7ddb8a8cb6 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -42,6 +42,8 @@ static const struct acpi_device_id lps0_device_ids[] = {
 
 /* AMD */
 #define ACPI_LPS0_DSM_UUID_AMD  "e3f32452-febc-43ce-9039-932122d37721"
+#define ACPI_LPS0_ENTRY_AMD 2
+#define ACPI_LPS0_EXIT_AMD  3
 #define ACPI_LPS0_SCREEN_OFF_AMD4
 #define ACPI_LPS0_SCREEN_ON_AMD 5
 
@@ -408,6 +410,7 @@ int acpi_s2idle_prepare_late(void)
 
if (acpi_s2idle_vendor_amd()) {
acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF_AMD);
+   acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY_AMD);
} else {
acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF);
acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY);
@@ -422,6 +425,7 @@ void acpi_s2idle_restore_early(void)
return;
 
if (acpi_s2idle_vendor_amd()) {
+   acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT_AMD);
acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON_AMD);
} else {
acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT);
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2 V2] platform/x86: force LPS0 functions for AMD

2021-03-17 Thread Alex Deucher
ACPI_LPS0_ENTRY_AMD/ACPI_LPS0_EXIT_AMD are supposedly not
required for AMD platforms, and on some platforms they are
not even listed in the function mask but at least some HP
laptops seem to require it to properly support s0ix.

Based on a patch from Marcin Bachry .

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1230
Signed-off-by: Alex Deucher 
Cc: Marcin Bachry 
---

V2: rework the patch to just fix up the specific problematic
case by setting the function flags that are missing.

 drivers/acpi/x86/s2idle.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 2d7ddb8a8cb6..482e6b23b21a 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -368,6 +368,13 @@ static int lps0_device_attach(struct acpi_device *adev,
 
ACPI_FREE(out_obj);
 
+   /*
+* Some HP laptops require ACPI_LPS0_ENTRY_AMD/ACPI_LPS0_EXIT_AMD for 
proper
+* S0ix, but don't set the function mask correctly.  Fix that up here.
+*/
+   if (acpi_s2idle_vendor_amd())
+   lps0_dsm_func_mask |= (1 << ACPI_LPS0_ENTRY_AMD) | (1 << 
ACPI_LPS0_EXIT_AMD);
+
acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n",
  lps0_dsm_func_mask);
 
-- 
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: check fb of primary plane

2021-03-17 Thread Harry Wentland




On 2021-03-16 5:50 p.m., Sefa Eyeoglu wrote:

Sometimes the primary plane might not be initialized (yet), which
causes dm_check_crtc_cursor to divide by zero.
Apparently a weird state before a S3-suspend causes the aforementioned
divide-by-zero error when resuming from S3.  This was explained in
bug 212293 on Bugzilla.

To avoid this divide-by-zero error we check if the primary plane's fb
isn't NULL.  If it's NULL the src_w and src_h attributes will be 0,
which would cause a divide-by-zero.

This fixes Bugzilla report 212293
https://bugzilla.kernel.org/show_bug.cgi?id=212293>> 
Fixes: 12f4849a1cfd69f3 ("drm/amd/display: check cursor scaling")

Signed-off-by: Sefa Eyeoglu 


Thanks for your patch.

It is
Reviewed-by: Harry Wentland 

Harry


---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 573cf17262da4e11..fbb1ac223ccbb62a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9267,7 +9267,8 @@ static int dm_check_crtc_cursor(struct drm_atomic_state 
*state,
  
  	new_cursor_state = drm_atomic_get_new_plane_state(state, crtc->cursor);

new_primary_state = drm_atomic_get_new_plane_state(state, 
crtc->primary);
-   if (!new_cursor_state || !new_primary_state || !new_cursor_state->fb) {
+   if (!new_cursor_state || !new_primary_state ||
+   !new_cursor_state->fb || !new_primary_state->fb) {
return 0;
}
  



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Fix memory leak

2021-03-17 Thread Alex Deucher
On Wed, Mar 17, 2021 at 5:56 AM xinhui pan  wrote:
>
> drm_gem_object_put() should be paired with drm_gem_object_lookup().
>
> All gem objs are saved in fb->base.obj[]. Need put the old first before
> assign a new obj.
>
> Trigger VRAM leak by running command below
> $ service gdm restart
>
> Signed-off-by: xinhui pan 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> index bebed0f307a1..46dafea8da8b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> @@ -955,8 +955,9 @@ int amdgpu_display_framebuffer_init(struct drm_device 
> *dev,
> }
>
> for (i = 1; i < rfb->base.format->num_planes; ++i) {
> +   drm_gem_object_get(rfb->base.obj[0]);
> +   drm_gem_object_put(rfb->base.obj[i]);
> rfb->base.obj[i] = rfb->base.obj[0];
> -   drm_gem_object_get(rfb->base.obj[i]);
> }
>
> return 0;
> @@ -1002,6 +1003,7 @@ amdgpu_display_user_framebuffer_create(struct 
> drm_device *dev,
> return ERR_PTR(ret);
> }
>
> +   drm_gem_object_put(obj);
> return _fb->base;
>  }
>
> --
> 2.25.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Allow idle optimization based on vblank.

2021-03-17 Thread R, Bindu
[AMD Public Use]

Thanks Guchun, yes this would require CONFIG_DRM_AMD_DC_DCN flag, will update 
it.

Regards,
Bindu

From: Chen, Guchun 
Sent: Tuesday, March 16, 2021 11:32 PM
To: Feng, Kenneth ; R, Bindu ; 
amd-gfx@lists.freedesktop.org 
Cc: Deucher, Alexander ; Zhou1, Tao 
; Lakha, Bhawanpreet ; R, Bindu 

Subject: RE: [PATCH] drm/amd/display: Allow idle optimization based on vblank.

[AMD Public Use]

+/* Allow idle optimization when vblank count is 0 for display off */
+if (dm->active_vblank_irq_count == 0)
+   dc_allow_idle_optimizations(dm->dc,true);
+

Above part needs to be guarded by CONFIG_DRM_AMD_DC_DCN?

Regards,
Guchun

-Original Message-
From: amd-gfx  On Behalf Of Feng, Kenneth
Sent: Wednesday, March 17, 2021 9:45 AM
To: R, Bindu ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Zhou1, Tao 
; Lakha, Bhawanpreet ; R, Bindu 

Subject: RE: [PATCH] drm/amd/display: Allow idle optimization based on vblank.

[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Kenneth Feng 


-Original Message-
From: amd-gfx  On Behalf Of Bindu 
Ramamurthy
Sent: Wednesday, March 17, 2021 7:50 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Lakha, Bhawanpreet 
; Zhou1, Tao ; Feng, Kenneth 
; R, Bindu 
Subject: [PATCH] drm/amd/display: Allow idle optimization based on vblank.

[CAUTION: External Email]

[Why]
idle optimization was being disabled after commit.

[How]
check vblank count for display off and enable idle optimization based on this 
count.

Signed-off-by: Bindu Ramamurthy 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 553e39f9538c..56a55143ad2d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -987,7 +987,7 @@ static void event_mall_stutter(struct work_struct *work)

if (vblank_work->enable)
dm->active_vblank_irq_count++;
-   else
+   else if(dm->active_vblank_irq_count)
dm->active_vblank_irq_count--;

dc_allow_idle_optimizations(
@@ -8694,7 +8694,10 @@ static void amdgpu_dm_atomic_commit_tail(struct 
drm_atomic_state *state)
WARN_ON(!dc_commit_state(dm->dc, dc_state));
mutex_unlock(>dc_lock);
}
-
+/* Allow idle optimization when vblank count is 0 for display off */
+if (dm->active_vblank_irq_count == 0)
+   dc_allow_idle_optimizations(dm->dc,true);
+
for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);

--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cguchun.chen%40amd.com%7Cb53f164f787e4da3725a08d8e8e6472c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637515423028336026%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=xZouQssY4uo%2FohKAZBCdej9gKFsggBExTfQ3Ddz8D%2BQ%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cguchun.chen%40amd.com%7Cb53f164f787e4da3725a08d8e8e6472c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637515423028336026%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=xZouQssY4uo%2FohKAZBCdej9gKFsggBExTfQ3Ddz8D%2BQ%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] radeon: use kvcalloc for relocs and chunks

2021-03-17 Thread Chen Li
On Wed, 17 Mar 2021 17:19:11 +0800,
Chen Li wrote:
> 
> On Wed, 17 Mar 2021 15:55:47 +0800,
> Christian König wrote:
> > 
> > Am 17.03.21 um 07:22 schrieb Chen Li:
> > > kvmalloc_array + __GFP_ZERO is the same with kvcalloc.
> > > 
> > > As for p->chunks, it will be used in:
> > > ```
> > > if (ib_chunk->kdata)
> > >   memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 
> > > 4);
> > > ```
> > > 
> > > If chunks doesn't zero out with __GFP_ZERO, it may point to somewhere 
> > > else, e.g.,
> > > ```
> > > Unable to handle kernel paging request at virtual address 0001
> > > ...
> > > pc is at memcpy+0x84/0x250
> > > ra is at radeon_cs_ioctl+0x368/0xb90 [radeon]
> > > ```
> > > 
> > > after allocating chunks with __GFP_KERNEL/kvcalloc, this bug is fixed.
> > 
> > NAK to zeroing the chunks array.
> > 
> > That array should be fully initialized with data before using it, otherwise 
> > we
> > have a much more serious bug and zeroing it out only papers over the real 
> > issue.
> > 
> > How did you trigger the NULL pointer deref above?
> 
> Hi, Christian, thanks for reply! From radeon_cs_parser_init:
> ```
>   if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
>   if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
> 
> /** chenli: chunks[0] come here and continue! **/
> 
>   continue;
>   }
> 
>   p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), 
> GFP_KERNEL);
> ```
> In my case, chunks[0] is not allocated because it is just get continued, so 
> it's not
> wired that kdata in "memcpy(parser->ib.ptr, ib_chunk->kdata, 
> ib_chunk->length_dw * 4);"
> trigger the invalid address. 
> 

By the ways, chunks were allocated with kcalloc before 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=3fcb4f01deedfa290e903e030956b8e1a5cb764f,
which do zero the chunks array, that's why this error never happen before.

> > 
> > Thanks,
> > Christian.
> > 
> > > Signed-off-by: Chen Li 
> > > ---
> > >   drivers/gpu/drm/radeon/radeon_cs.c | 6 +++---
> > >   1 file changed, 3 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
> > > b/drivers/gpu/drm/radeon/radeon_cs.c
> > > index fb736ef9f9aa..059431689c2d 100644
> > > --- a/drivers/gpu/drm/radeon/radeon_cs.c
> > > +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> > > @@ -93,8 +93,8 @@ static int radeon_cs_parser_relocs(struct 
> > > radeon_cs_parser *p)
> > >   p->dma_reloc_idx = 0;
> > >   /* FIXME: we assume that each relocs use 4 dwords */
> > >   p->nrelocs = chunk->length_dw / 4;
> > > - p->relocs = kvmalloc_array(p->nrelocs, sizeof(struct radeon_bo_list),
> > > - GFP_KERNEL | __GFP_ZERO);
> > > + p->relocs = kvcalloc(p->nrelocs, sizeof(struct radeon_bo_list),
> > > + GFP_KERNEL);
> > >   if (p->relocs == NULL) {
> > >   return -ENOMEM;
> > >   }
> > > @@ -299,7 +299,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, 
> > > void *data)
> > >   }
> > >   p->cs_flags = 0;
> > >   p->nchunks = cs->num_chunks;
> > > - p->chunks = kvmalloc_array(p->nchunks, sizeof(struct radeon_cs_chunk), 
> > > GFP_KERNEL);
> > > + p->chunks = kvcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), 
> > > GFP_KERNEL);
> > >   if (p->chunks == NULL) {
> > >   return -ENOMEM;
> > >   }
> > 
> > 
> > 
> 
> Regards,
>   Chen Li
> 
> 

Regards,
  Chen Li


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] platform/x86: Add missing LPS0 functions for AMD

2021-03-17 Thread Hans de Goede
Hi,

On 3/16/21 8:46 PM, Alex Deucher wrote:
> These are supposedly not required for AMD platforms,
> but at least some HP laptops seem to require it to
> properly turn off the keyboard backlight.
> 
> Based on a patch from Marcin Bachry .
> 
> Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1230
> Signed-off-by: Alex Deucher 
> Cc: Marcin Bachry 

Thanks, patch looks good to me:

Reviewed-by: Hans de Goede 

Regards,

Hans


> ---
>  drivers/acpi/x86/s2idle.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
> index 2b69536cdccb..2d7ddb8a8cb6 100644
> --- a/drivers/acpi/x86/s2idle.c
> +++ b/drivers/acpi/x86/s2idle.c
> @@ -42,6 +42,8 @@ static const struct acpi_device_id lps0_device_ids[] = {
>  
>  /* AMD */
>  #define ACPI_LPS0_DSM_UUID_AMD  "e3f32452-febc-43ce-9039-932122d37721"
> +#define ACPI_LPS0_ENTRY_AMD 2
> +#define ACPI_LPS0_EXIT_AMD  3
>  #define ACPI_LPS0_SCREEN_OFF_AMD4
>  #define ACPI_LPS0_SCREEN_ON_AMD 5
>  
> @@ -408,6 +410,7 @@ int acpi_s2idle_prepare_late(void)
>  
>   if (acpi_s2idle_vendor_amd()) {
>   acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF_AMD);
> + acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY_AMD);
>   } else {
>   acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF);
>   acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY);
> @@ -422,6 +425,7 @@ void acpi_s2idle_restore_early(void)
>   return;
>  
>   if (acpi_s2idle_vendor_amd()) {
> + acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT_AMD);
>   acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON_AMD);
>   } else {
>   acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT);
> 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] radeon: use kvcalloc for relocs and chunks

2021-03-17 Thread Chen Li
On Wed, 17 Mar 2021 15:55:47 +0800,
Christian König wrote:
> 
> Am 17.03.21 um 07:22 schrieb Chen Li:
> > kvmalloc_array + __GFP_ZERO is the same with kvcalloc.
> > 
> > As for p->chunks, it will be used in:
> > ```
> > if (ib_chunk->kdata)
> > memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 
> > 4);
> > ```
> > 
> > If chunks doesn't zero out with __GFP_ZERO, it may point to somewhere else, 
> > e.g.,
> > ```
> > Unable to handle kernel paging request at virtual address 0001
> > ...
> > pc is at memcpy+0x84/0x250
> > ra is at radeon_cs_ioctl+0x368/0xb90 [radeon]
> > ```
> > 
> > after allocating chunks with __GFP_KERNEL/kvcalloc, this bug is fixed.
> 
> NAK to zeroing the chunks array.
> 
> That array should be fully initialized with data before using it, otherwise we
> have a much more serious bug and zeroing it out only papers over the real 
> issue.
> 
> How did you trigger the NULL pointer deref above?

Hi, Christian, thanks for reply! From radeon_cs_parser_init:
```
if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))

/** chenli: chunks[0] come here and continue! **/

continue;
}

p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), 
GFP_KERNEL);
```
In my case, chunks[0] is not allocated because it is just get continued, so 
it's not
wired that kdata in "memcpy(parser->ib.ptr, ib_chunk->kdata, 
ib_chunk->length_dw * 4);"
trigger the invalid address. 

> 
> Thanks,
> Christian.
> 
> > Signed-off-by: Chen Li 
> > ---
> >   drivers/gpu/drm/radeon/radeon_cs.c | 6 +++---
> >   1 file changed, 3 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
> > b/drivers/gpu/drm/radeon/radeon_cs.c
> > index fb736ef9f9aa..059431689c2d 100644
> > --- a/drivers/gpu/drm/radeon/radeon_cs.c
> > +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> > @@ -93,8 +93,8 @@ static int radeon_cs_parser_relocs(struct 
> > radeon_cs_parser *p)
> > p->dma_reloc_idx = 0;
> > /* FIXME: we assume that each relocs use 4 dwords */
> > p->nrelocs = chunk->length_dw / 4;
> > -   p->relocs = kvmalloc_array(p->nrelocs, sizeof(struct radeon_bo_list),
> > -   GFP_KERNEL | __GFP_ZERO);
> > +   p->relocs = kvcalloc(p->nrelocs, sizeof(struct radeon_bo_list),
> > +   GFP_KERNEL);
> > if (p->relocs == NULL) {
> > return -ENOMEM;
> > }
> > @@ -299,7 +299,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, 
> > void *data)
> > }
> > p->cs_flags = 0;
> > p->nchunks = cs->num_chunks;
> > -   p->chunks = kvmalloc_array(p->nchunks, sizeof(struct radeon_cs_chunk), 
> > GFP_KERNEL);
> > +   p->chunks = kvcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), 
> > GFP_KERNEL);
> > if (p->chunks == NULL) {
> > return -ENOMEM;
> > }
> 
> 
> 

Regards,
  Chen Li


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] platform/x86: force LPS0 functions for AMD

2021-03-17 Thread Hans de Goede
Hi,

On 3/16/21 8:46 PM, Alex Deucher wrote:
> ACPI_LPS0_ENTRY_AMD/ACPI_LPS0_EXIT_AMD are supposedly not
> required for AMD platforms, and on some platforms they are
> not even listed in the function mask but at least some HP
> laptops seem to require it to properly support s0ix.
> 
> Based on a patch from Marcin Bachry .
> 
> Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1230
> Signed-off-by: Alex Deucher 
> Cc: Marcin Bachry 
> ---
>  drivers/acpi/x86/s2idle.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
> index 2d7ddb8a8cb6..dc3cc021125e 100644
> --- a/drivers/acpi/x86/s2idle.c
> +++ b/drivers/acpi/x86/s2idle.c
> @@ -317,11 +317,16 @@ static void lpi_check_constraints(void)
>   }
>  }
>  
> +static bool acpi_s2idle_vendor_amd(void)
> +{
> + return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
> +}
> +
>  static void acpi_sleep_run_lps0_dsm(unsigned int func)
>  {
>   union acpi_object *out_obj;
>  
> - if (!(lps0_dsm_func_mask & (1 << func)))
> + if (!acpi_s2idle_vendor_amd() && !(lps0_dsm_func_mask & (1 << func)))
>   return;
>  
>   out_obj = acpi_evaluate_dsm(lps0_device_handle, _dsm_guid, rev_id, 
> func, NULL);

Skipping the dsm_func_mask feels a bit wrong to me. The commit message talks
about there being a need to unconditional make the calls in the case of the
ACPI_LPS0_ENTRY_AMD/ACPI_LPS0_EXIT_AMD calls. Maybe instead add a 
"skip_func_mask"
boolean parameter to acpi_sleep_run_lps0_dsm() and set that to false everywhere
except for the 2 ACPI_LPS0_ENTRY_AMD/ACPI_LPS0_EXIT_AMD calls ?

This way we can control when to skip the check on a call by call basis, rather
then always skipping it on all AMD systems.

Regards,

Hans


> @@ -331,11 +336,6 @@ static void acpi_sleep_run_lps0_dsm(unsigned int func)
> func, out_obj ? "successful" : "failed");
>  }
>  
> -static bool acpi_s2idle_vendor_amd(void)
> -{
> - return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
> -}
> -
>  static int lps0_device_attach(struct acpi_device *adev,
> const struct acpi_device_id *not_used)
>  {
> 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Allow idle optimization based on vblank.

2021-03-17 Thread Michel Dänzer
On 2021-03-17 12:49 a.m., Bindu Ramamurthy wrote:
> [Why]
> idle optimization was being disabled after commit.
> 
> [How]
> check vblank count for display off and enable idle optimization based on this 
> count.
> 
> Signed-off-by: Bindu Ramamurthy 
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 553e39f9538c..56a55143ad2d 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -987,7 +987,7 @@ static void event_mall_stutter(struct work_struct *work)
>  
>   if (vblank_work->enable)
>   dm->active_vblank_irq_count++;
> - else
> + else if(dm->active_vblank_irq_count)
>   dm->active_vblank_irq_count--;

The commit log should explain why this part is needed.


-- 
Earthling Michel Dänzer   |   https://redhat.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/1] drm/amd/display: use GFP_ATOMIC in dcn20_resource_construct

2021-03-17 Thread Nirmoy Das
Replace GFP_KERNEL with GFP_ATOMIC as dcn20_resource_construct()
can't sleep.

Partially fixes: https://bugzilla.kernel.org/show_bug.cgi?id=212311
as dcn20_resource_construct() also calls into SMU functions which does
mutex_lock().

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/display/dc/dce/dce_abm.c  |  2 +-
 drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c |  6 ++---
 .../gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c |  2 +-
 .../drm/amd/display/dc/dcn20/dcn20_resource.c | 26 +--
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c 
b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index 4e87e70237e3..874b132fe1d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -283,7 +283,7 @@ struct abm *dce_abm_create(
const struct dce_abm_shift *abm_shift,
const struct dce_abm_mask *abm_mask)
 {
-   struct dce_abm *abm_dce = kzalloc(sizeof(*abm_dce), GFP_KERNEL);
+   struct dce_abm *abm_dce = kzalloc(sizeof(*abm_dce), GFP_ATOMIC);
 
if (abm_dce == NULL) {
BREAK_TO_DEBUGGER();
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c 
b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
index 4f864501e046..8cd841320ded 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
@@ -1133,7 +1133,7 @@ struct dmcu *dcn10_dmcu_create(
const struct dce_dmcu_shift *dmcu_shift,
const struct dce_dmcu_mask *dmcu_mask)
 {
-   struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_KERNEL);
+   struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_ATOMIC);
 
if (dmcu_dce == NULL) {
BREAK_TO_DEBUGGER();
@@ -1154,7 +1154,7 @@ struct dmcu *dcn20_dmcu_create(
const struct dce_dmcu_shift *dmcu_shift,
const struct dce_dmcu_mask *dmcu_mask)
 {
-   struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_KERNEL);
+   struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_ATOMIC);
 
if (dmcu_dce == NULL) {
BREAK_TO_DEBUGGER();
@@ -1175,7 +1175,7 @@ struct dmcu *dcn21_dmcu_create(
const struct dce_dmcu_shift *dmcu_shift,
const struct dce_dmcu_mask *dmcu_mask)
 {
-   struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_KERNEL);
+   struct dce_dmcu *dmcu_dce = kzalloc(sizeof(*dmcu_dce), GFP_ATOMIC);
 
if (dmcu_dce == NULL) {
BREAK_TO_DEBUGGER();
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c
index 62cc2651e00c..8774406120fc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c
@@ -112,7 +112,7 @@ struct dccg *dccg2_create(
const struct dccg_shift *dccg_shift,
const struct dccg_mask *dccg_mask)
 {
-   struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_KERNEL);
+   struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_ATOMIC);
struct dccg *base;
 
if (dccg_dcn == NULL) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 2307b3517821..25f8dd5db080 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -1106,7 +1106,7 @@ struct dpp *dcn20_dpp_create(
uint32_t inst)
 {
struct dcn20_dpp *dpp =
-   kzalloc(sizeof(struct dcn20_dpp), GFP_KERNEL);
+   kzalloc(sizeof(struct dcn20_dpp), GFP_ATOMIC);
 
if (!dpp)
return NULL;
@@ -1124,7 +1124,7 @@ struct input_pixel_processor *dcn20_ipp_create(
struct dc_context *ctx, uint32_t inst)
 {
struct dcn10_ipp *ipp =
-   kzalloc(sizeof(struct dcn10_ipp), GFP_KERNEL);
+   kzalloc(sizeof(struct dcn10_ipp), GFP_ATOMIC);
 
if (!ipp) {
BREAK_TO_DEBUGGER();
@@ -1141,7 +1141,7 @@ struct output_pixel_processor *dcn20_opp_create(
struct dc_context *ctx, uint32_t inst)
 {
struct dcn20_opp *opp =
-   kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL);
+   kzalloc(sizeof(struct dcn20_opp), GFP_ATOMIC);
 
if (!opp) {
BREAK_TO_DEBUGGER();
@@ -1158,7 +1158,7 @@ struct dce_aux *dcn20_aux_engine_create(
uint32_t inst)
 {
struct aux_engine_dce110 *aux_engine =
-   kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL);
+   kzalloc(sizeof(struct aux_engine_dce110), GFP_ATOMIC);
 
if (!aux_engine)
return NULL;
@@ -1196,7 +1196,7 @@ struct dce_i2c_hw *dcn20_i2c_hw_create(
uint32_t inst)
 {
struct dce_i2c_hw *dce_i2c_hw =
-   kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL);
+   kzalloc(sizeof(struct dce_i2c_hw), GFP_ATOMIC);
 

[PATCH] drm/amdgpu: Fix memory leak

2021-03-17 Thread xinhui pan
drm_gem_object_put() should be paired with drm_gem_object_lookup().

All gem objs are saved in fb->base.obj[]. Need put the old first before
assign a new obj.

Trigger VRAM leak by running command below
$ service gdm restart

Signed-off-by: xinhui pan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index bebed0f307a1..46dafea8da8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -955,8 +955,9 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
}
 
for (i = 1; i < rfb->base.format->num_planes; ++i) {
+   drm_gem_object_get(rfb->base.obj[0]);
+   drm_gem_object_put(rfb->base.obj[i]);
rfb->base.obj[i] = rfb->base.obj[0];
-   drm_gem_object_get(rfb->base.obj[i]);
}
 
return 0;
@@ -1002,6 +1003,7 @@ amdgpu_display_user_framebuffer_create(struct drm_device 
*dev,
return ERR_PTR(ret);
}
 
+   drm_gem_object_put(obj);
return _fb->base;
 }
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] radeon: use kvcalloc for relocs and chunks

2021-03-17 Thread Christian König



Am 17.03.21 um 10:40 schrieb Chen Li:

On Wed, 17 Mar 2021 17:19:11 +0800,
Chen Li wrote:

On Wed, 17 Mar 2021 15:55:47 +0800,
Christian König wrote:

Am 17.03.21 um 07:22 schrieb Chen Li:

kvmalloc_array + __GFP_ZERO is the same with kvcalloc.

As for p->chunks, it will be used in:
```
if (ib_chunk->kdata)
memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 
4);
```

If chunks doesn't zero out with __GFP_ZERO, it may point to somewhere else, 
e.g.,
```
Unable to handle kernel paging request at virtual address 0001
...
pc is at memcpy+0x84/0x250
ra is at radeon_cs_ioctl+0x368/0xb90 [radeon]
```

after allocating chunks with __GFP_KERNEL/kvcalloc, this bug is fixed.

NAK to zeroing the chunks array.

That array should be fully initialized with data before using it, otherwise we
have a much more serious bug and zeroing it out only papers over the real issue.

How did you trigger the NULL pointer deref above?

Hi, Christian, thanks for reply! From radeon_cs_parser_init:
```
if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))

 /** chenli: chunks[0] come here and continue! **/

continue;
}

p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), 
GFP_KERNEL);
```
In my case, chunks[0] is not allocated because it is just get continued, so 
it's not
wired that kdata in "memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 
4);"
trigger the invalid address.
 

By the ways, chunks were allocated with kcalloc before 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Fnext%2Flinux-next.git%2Fcommit%2F%3Fid%3D3fcb4f01deedfa290e903e030956b8e1a5cb764fdata=04%7C01%7Cchristian.koenig%40amd.com%7Ca2386f016a6f40d910a808d8e928a88f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637515708138849091%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=ztgRwRsRLV%2FUYqaWvCrQ9WGEEVh6x3a4%2FHZyS%2F%2FttBc%3Dreserved=0,
which do zero the chunks array, that's why this error never happen before.


Ah! I was really wondering why that worked all those years.

We try to avoid kcalloc and similar in new code because it is often used 
as a wildcard to paper over real problems and disables KASAN checks 
etc... But when this is just restoring old behavior it is probably ok.


Please add a Fixes: tag pointing to the original commit which introduced 
the problem so that backporters can handle that properly.


With that done the patch is Reviewed-by: Christian König 



Thanks,
Christian.




Thanks,
Christian.


Signed-off-by: Chen Li 
---
   drivers/gpu/drm/radeon/radeon_cs.c | 6 +++---
   1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index fb736ef9f9aa..059431689c2d 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -93,8 +93,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
p->dma_reloc_idx = 0;
/* FIXME: we assume that each relocs use 4 dwords */
p->nrelocs = chunk->length_dw / 4;
-   p->relocs = kvmalloc_array(p->nrelocs, sizeof(struct radeon_bo_list),
-   GFP_KERNEL | __GFP_ZERO);
+   p->relocs = kvcalloc(p->nrelocs, sizeof(struct radeon_bo_list),
+   GFP_KERNEL);
if (p->relocs == NULL) {
return -ENOMEM;
}
@@ -299,7 +299,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
}
p->cs_flags = 0;
p->nchunks = cs->num_chunks;
-   p->chunks = kvmalloc_array(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
+   p->chunks = kvcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
if (p->chunks == NULL) {
return -ENOMEM;
}




Regards,
   Chen Li



Regards,
   Chen Li




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] radeon: use kvcalloc for relocs and chunks

2021-03-17 Thread Christian König

Am 17.03.21 um 10:19 schrieb Chen Li:

On Wed, 17 Mar 2021 15:55:47 +0800,
Christian König wrote:

Am 17.03.21 um 07:22 schrieb Chen Li:

kvmalloc_array + __GFP_ZERO is the same with kvcalloc.

As for p->chunks, it will be used in:
```
if (ib_chunk->kdata)
memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 
4);
```

If chunks doesn't zero out with __GFP_ZERO, it may point to somewhere else, 
e.g.,
```
Unable to handle kernel paging request at virtual address 0001
...
pc is at memcpy+0x84/0x250
ra is at radeon_cs_ioctl+0x368/0xb90 [radeon]
```

after allocating chunks with __GFP_KERNEL/kvcalloc, this bug is fixed.

NAK to zeroing the chunks array.

That array should be fully initialized with data before using it, otherwise we
have a much more serious bug and zeroing it out only papers over the real issue.

How did you trigger the NULL pointer deref above?

Hi, Christian, thanks for reply! From radeon_cs_parser_init:
```
if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))

 /** chenli: chunks[0] come here and continue! **/

continue;
}

p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), 
GFP_KERNEL);
```
In my case, chunks[0] is not allocated because it is just get continued, so 
it's not
wired that kdata in "memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 
4);"
trigger the invalid address.


Right, the problem is this memory optimization added ~8 years ago.

We don't set the kdata pointer to NULL in that case, can you please add 
this instead of setting the whole structure to zero?


Thanks,
Christian.

 

Thanks,
Christian.


Signed-off-by: Chen Li 
---
   drivers/gpu/drm/radeon/radeon_cs.c | 6 +++---
   1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index fb736ef9f9aa..059431689c2d 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -93,8 +93,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
p->dma_reloc_idx = 0;
/* FIXME: we assume that each relocs use 4 dwords */
p->nrelocs = chunk->length_dw / 4;
-   p->relocs = kvmalloc_array(p->nrelocs, sizeof(struct radeon_bo_list),
-   GFP_KERNEL | __GFP_ZERO);
+   p->relocs = kvcalloc(p->nrelocs, sizeof(struct radeon_bo_list),
+   GFP_KERNEL);
if (p->relocs == NULL) {
return -ENOMEM;
}
@@ -299,7 +299,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
}
p->cs_flags = 0;
p->nchunks = cs->num_chunks;
-   p->chunks = kvmalloc_array(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
+   p->chunks = kvcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
if (p->chunks == NULL) {
return -ENOMEM;
}




Regards,
   Chen Li




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] radeon: use kvcalloc for relocs and chunks

2021-03-17 Thread Chen Li


kvmalloc_array + __GFP_ZERO is the same with kvcalloc.

As for p->chunks, it will be used in:
```
if (ib_chunk->kdata)
memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 
4);
```

If chunks doesn't zero out with __GFP_ZERO, it may point to somewhere else, 
e.g.,
```
Unable to handle kernel paging request at virtual address 0001
...
pc is at memcpy+0x84/0x250
ra is at radeon_cs_ioctl+0x368/0xb90 [radeon]
```

after allocating chunks with __GFP_KERNEL/kvcalloc, this bug is fixed.
Signed-off-by: Chen Li 
---
 drivers/gpu/drm/radeon/radeon_cs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index fb736ef9f9aa..059431689c2d 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -93,8 +93,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
p->dma_reloc_idx = 0;
/* FIXME: we assume that each relocs use 4 dwords */
p->nrelocs = chunk->length_dw / 4;
-   p->relocs = kvmalloc_array(p->nrelocs, sizeof(struct radeon_bo_list),
-   GFP_KERNEL | __GFP_ZERO);
+   p->relocs = kvcalloc(p->nrelocs, sizeof(struct radeon_bo_list),
+   GFP_KERNEL);
if (p->relocs == NULL) {
return -ENOMEM;
}
@@ -299,7 +299,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
}
p->cs_flags = 0;
p->nchunks = cs->num_chunks;
-   p->chunks = kvmalloc_array(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
+   p->chunks = kvcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
if (p->chunks == NULL) {
return -ENOMEM;
}
-- 
2.30.1



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [RESEND 00/53] Rid GPU from W=1 warnings

2021-03-17 Thread Lee Jones
On Thu, 11 Mar 2021, Lee Jones wrote:

> On Thu, 11 Mar 2021, Daniel Vetter wrote:
> 
> > On Mon, Mar 08, 2021 at 09:19:32AM +, Lee Jones wrote:
> > > On Fri, 05 Mar 2021, Roland Scheidegger wrote:
> > > 
> > > > The vmwgfx ones look all good to me, so for
> > > > 23-53: Reviewed-by: Roland Scheidegger 
> > > > That said, they were already signed off by Zack, so not sure what
> > > > happened here.
> > > 
> > > Yes, they were accepted at one point, then dropped without a reason.
> > > 
> > > Since I rebased onto the latest -next, I had to pluck them back out of
> > > a previous one.
> > 
> > They should show up in linux-next again. We merge patches for next merge
> > window even during the current merge window, but need to make sure they
> > don't pollute linux-next. Occasionally the cut off is wrong so patches
> > show up, and then get pulled again.
> > 
> > Unfortunately especially the 5.12 merge cycle was very wobbly due to some
> > confusion here. But your patches should all be in linux-next again (they
> > are queued up for 5.13 in drm-misc-next, I checked that).
> > 
> > Sorry for the confusion here.
> 
> Oh, I see.  Well so long as they don't get dropped, I'll be happy.
> 
> Thanks for the explanation Daniel

After rebasing today, all of my GPU patches have remained.  Would
someone be kind enough to check that everything is still in order
please?

-- 
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: drop extraneous hw_status update

2021-03-17 Thread Quan, Evan
[AMD Public Use]

Reviewed-by: Evan Quan 

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Wednesday, March 17, 2021 1:38 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: [PATCH] drm/amdgpu: drop extraneous hw_status update

We set the same variable a few lines above.  Drop the duplicate setting.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bca4b5aa6407..7aa95eddd521 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2739,7 +2739,6 @@ static int amdgpu_device_ip_suspend_phase2(struct 
amdgpu_device *adev)
}
}
}
-   adev->ip_blocks[i].status.hw = false;
}
 
return 0;
--
2.30.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cevan.quan%40amd.com%7C6f25f4ebe1e64011f07d08d8e8a23f87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637515130869739502%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=%2BrRMUIeJocqIy7GSP4XsSxiOH1T1O0Tuk0gZLRD7ats%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 0/5] amd/display: improve atomic cursor checks

2021-03-17 Thread Simon Ser
On Wednesday, March 17th, 2021 at 9:32 AM, Michel Dänzer  
wrote:

> On 2021-03-17 9:21 a.m., Simon Ser wrote:
> > On Thursday, March 11th, 2021 at 3:13 PM, Michel Dänzer 
> >  wrote:
> >
> >>> I'm not a fan of adding kernel hacks like setting up a transparent FB, 
> >>> when
> >>> user-space can just avoid the failure with atomic test-only commits (and 
> >>> e.g.
> >>> use the overlay to display the cursor image instead of the cursor plane).
> >>
> >> I'm not a fan of requiring each atomic client to handle this complexity.
> >
> > That's just how atomic works though. User-space is expected to incrementally
> > build the atomic request, bailing out if something doesn't work along the 
> > way.
>
> Being unable to disable a plane which is currently enabled is quite different
> from being unable to enable a plane which is currently disabled. How is user
> space supposed to react to that, other than maybe disabling everything and
> starting from scratch?

What I mean by "singular atomic commit" is an atomic commit that tries to
change a single thing, and expects that to just work. Whether it's about
disabling a plane or not doesn't matter much from my point of view. For
instance, maybe scaling is enabled and user-space wants to disable scaling, but
the driver can't? This would be surprising to user-space because "user-space is
just trying to disable $feature", just like the the case we're talking about.

> > Doing it the old way (ie. issuing singular atomic commits, ie. using the 
> > atomic
> > API just like the legacy API is used) won't work in many situations anyways.
>
> This isn't about that, not sure why you keep bringing it up.

Well, a client that incrementally builds the full atomic commit (like Weston
and libliftoff) won't have this issue.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: check fb of primary plane

2021-03-17 Thread Michel Dänzer
On 2021-03-17 9:29 a.m., Simon Ser wrote:
> On Tuesday, March 16th, 2021 at 10:50 PM, Sefa Eyeoglu 
>  wrote:
> 
>> Sometimes the primary plane might not be initialized (yet), which
>> causes dm_check_crtc_cursor to divide by zero.
>> Apparently a weird state before a S3-suspend causes the aforementioned
>> divide-by-zero error when resuming from S3.  This was explained in
>> bug 212293 on Bugzilla.
>>
>> To avoid this divide-by-zero error we check if the primary plane's fb
>> isn't NULL.  If it's NULL the src_w and src_h attributes will be 0,
>> which would cause a divide-by-zero.
>>
>> This fixes Bugzilla report 212293
>> https://bugzilla.kernel.org/show_bug.cgi?id=212293
>>
>> Fixes: 12f4849a1cfd69f3 ("drm/amd/display: check cursor scaling")
>> Signed-off-by: Sefa Eyeoglu 
> 
> Thanks for the fix! In theory we should return -EINVAL here, because we can't
> enable the cursor plane without the primary plane. But that would break the
> legacy API translation layer in DRM core, which expects that planes can always
> be disabled individually.

The core DRM code can deal with being unable to enable the CRTC while the 
primary plane is disabled. If you have evidence to the contrary, I'd like to 
see it.


-- 
Earthling Michel Dänzer   |   https://redhat.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v2 0/5] amd/display: improve atomic cursor checks

2021-03-17 Thread Michel Dänzer

[ Adding Daniel again ]

On 2021-03-17 9:21 a.m., Simon Ser wrote:
> On Thursday, March 11th, 2021 at 3:13 PM, Michel Dänzer  
> wrote:
> 
>>> I'm not a fan of adding kernel hacks like setting up a transparent FB, when
>>> user-space can just avoid the failure with atomic test-only commits (and 
>>> e.g.
>>> use the overlay to display the cursor image instead of the cursor plane).
>>
>> I'm not a fan of requiring each atomic client to handle this complexity.
> 
> That's just how atomic works though. User-space is expected to incrementally
> build the atomic request, bailing out if something doesn't work along the way.

Being unable to disable a plane which is currently enabled is quite different 
from being unable to enable a plane which is currently disabled. How is user 
space supposed to react to that, other than maybe disabling everything and 
starting from scratch?


> Doing it the old way (ie. issuing singular atomic commits, ie. using the 
> atomic
> API just like the legacy API is used) won't work in many situations anyways.

This isn't about that, not sure why you keep bringing it up.


-- 
Earthling Michel Dänzer   |   https://redhat.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: check fb of primary plane

2021-03-17 Thread Simon Ser
On Tuesday, March 16th, 2021 at 10:50 PM, Sefa Eyeoglu  
wrote:

> Sometimes the primary plane might not be initialized (yet), which
> causes dm_check_crtc_cursor to divide by zero.
> Apparently a weird state before a S3-suspend causes the aforementioned
> divide-by-zero error when resuming from S3.  This was explained in
> bug 212293 on Bugzilla.
>
> To avoid this divide-by-zero error we check if the primary plane's fb
> isn't NULL.  If it's NULL the src_w and src_h attributes will be 0,
> which would cause a divide-by-zero.
>
> This fixes Bugzilla report 212293
> https://bugzilla.kernel.org/show_bug.cgi?id=212293
>
> Fixes: 12f4849a1cfd69f3 ("drm/amd/display: check cursor scaling")
> Signed-off-by: Sefa Eyeoglu 

Thanks for the fix! In theory we should return -EINVAL here, because we can't
enable the cursor plane without the primary plane. But that would break the
legacy API translation layer in DRM core, which expects that planes can always
be disabled individually.

So this is:

Reviewed-by: Simon Ser 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/4] drm/amdgpu: use amdgpu_bo_dmabuf for shared prime count

2021-03-17 Thread Nirmoy


On 3/17/21 9:15 AM, Christian König wrote:



Am 17.03.21 um 09:13 schrieb Nirmoy:


On 3/17/21 9:06 AM, Christian König wrote:

That whole approach won't work :)

The prime_shared_count is on the exported BO and not on the imported 
one.


So any user BO can be exported.



So any BO from any drm driver is possible. No wonder it worked with 
amdgpu-amdgpu test machine.


OK scrapping the series :/


Try looking into the page table BOs, that has much more potential for 
a cleanup than this single field here.


Christian.



Thanks, checking it.


Nirmoy







Thanks,

Nirmoy




Regards,
Christian.

Am 17.03.21 um 08:47 schrieb Nirmoy Das:

Remove prime_shared_count from base class and use that
the subclass, amdgpu_bo_dmabuf.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 12 
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
  5 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index b5c766998045..04994757cc9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -618,7 +618,7 @@ static int amdgpu_cs_parser_bos(struct 
amdgpu_cs_parser *p,

  struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
    /* Make sure we use the exclusive slot for shared BOs */
-    if (bo->prime_shared_count)
+    if (is_amdgpu_bo_dmabuf(bo))
  e->tv.num_shared = 0;
  e->bo_va = amdgpu_vm_bo_find(vm, bo);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index e0c4f7c7f1b9..3cf57ea56499 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -143,6 +143,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf 
*dmabuf,

  {
  struct drm_gem_object *obj = dmabuf->priv;
  struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+    struct amdgpu_bo_dmabuf *dbo = to_amdgpu_bo_dmabuf(bo);
  struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  int r;
  @@ -172,7 +173,7 @@ static int amdgpu_dma_buf_attach(struct 
dma_buf *dmabuf,

  if (r)
  goto out;
  -    bo->prime_shared_count++;
+    dbo->prime_shared_count++;
  amdgpu_bo_unreserve(bo);
  return 0;
  @@ -194,10 +195,11 @@ static void amdgpu_dma_buf_detach(struct 
dma_buf *dmabuf,

  {
  struct drm_gem_object *obj = dmabuf->priv;
  struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+    struct amdgpu_bo_dmabuf *dbo = to_amdgpu_bo_dmabuf(bo);
  struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  -    if (attach->dev->driver != adev->dev->driver && 
bo->prime_shared_count)

-    bo->prime_shared_count--;
+    if (attach->dev->driver != adev->dev->driver && 
dbo->prime_shared_count)

+    dbo->prime_shared_count--;
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
  pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -431,6 +433,7 @@ amdgpu_dma_buf_create_obj(struct drm_device 
*dev, struct dma_buf *dma_buf)

  struct amdgpu_device *adev = drm_to_adev(dev);
  struct drm_gem_object *gobj;
  struct amdgpu_bo *bo;
+    struct amdgpu_bo_dmabuf *dbo;
  uint64_t flags = 0;
  int ret;
  @@ -449,10 +452,11 @@ amdgpu_dma_buf_create_obj(struct drm_device 
*dev, struct dma_buf *dma_buf)

  goto error;
    bo = gem_to_amdgpu_bo(gobj);
+    dbo = to_amdgpu_bo_dmabuf(bo);
  bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
  bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
  if (dma_buf->ops != _dmabuf_ops)
-    bo->prime_shared_count = 1;
+    dbo->prime_shared_count = 1;
    dma_resv_unlock(resv);
  return gobj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

index 5366a806be2b..7cce8aa29fd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -783,7 +783,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, 
void *data,

  break;
  }
  case AMDGPU_GEM_OP_SET_PLACEMENT:
-    if (robj->prime_shared_count && (args->value & 
AMDGPU_GEM_DOMAIN_VRAM)) {
+    if (is_amdgpu_bo_dmabuf(robj) && (args->value & 
AMDGPU_GEM_DOMAIN_VRAM)) {

  r = -EINVAL;
  amdgpu_bo_unreserve(robj);
  break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index ad615eec1e8c..435bf85991e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -965,7 +965,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
*bo, u32 domain,

  return -EINVAL;
    /* A shared bo cannot be migrated to VRAM */
-    if (bo->prime_shared_count || 

Re: [PATCH v2 0/5] amd/display: improve atomic cursor checks

2021-03-17 Thread Simon Ser
On Thursday, March 11th, 2021 at 3:13 PM, Michel Dänzer  
wrote:

> > I'm not a fan of adding kernel hacks like setting up a transparent FB, when
> > user-space can just avoid the failure with atomic test-only commits (and 
> > e.g.
> > use the overlay to display the cursor image instead of the cursor plane).
>
> I'm not a fan of requiring each atomic client to handle this complexity.

That's just how atomic works though. User-space is expected to incrementally
build the atomic request, bailing out if something doesn't work along the way.
Doing it the old way (ie. issuing singular atomic commits, ie. using the atomic
API just like the legacy API is used) won't work in many situations anyways.

Plus, a library such as libliftoff will just handle all of that complexity for
you.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/4] drm/amdgpu: use amdgpu_bo_dmabuf for shared prime count

2021-03-17 Thread Christian König



Am 17.03.21 um 09:13 schrieb Nirmoy:


On 3/17/21 9:06 AM, Christian König wrote:

That whole approach won't work :)

The prime_shared_count is on the exported BO and not on the imported 
one.


So any user BO can be exported.



So any BO from any drm driver is possible. No wonder it worked with 
amdgpu-amdgpu test machine.


OK scrapping the series :/


Try looking into the page table BOs, that has much more potential for a 
cleanup than this single field here.


Christian.




Thanks,

Nirmoy




Regards,
Christian.

Am 17.03.21 um 08:47 schrieb Nirmoy Das:

Remove prime_shared_count from base class and use that
the subclass, amdgpu_bo_dmabuf.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 12 
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
  5 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index b5c766998045..04994757cc9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -618,7 +618,7 @@ static int amdgpu_cs_parser_bos(struct 
amdgpu_cs_parser *p,

  struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
    /* Make sure we use the exclusive slot for shared BOs */
-    if (bo->prime_shared_count)
+    if (is_amdgpu_bo_dmabuf(bo))
  e->tv.num_shared = 0;
  e->bo_va = amdgpu_vm_bo_find(vm, bo);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index e0c4f7c7f1b9..3cf57ea56499 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -143,6 +143,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf 
*dmabuf,

  {
  struct drm_gem_object *obj = dmabuf->priv;
  struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+    struct amdgpu_bo_dmabuf *dbo = to_amdgpu_bo_dmabuf(bo);
  struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  int r;
  @@ -172,7 +173,7 @@ static int amdgpu_dma_buf_attach(struct 
dma_buf *dmabuf,

  if (r)
  goto out;
  -    bo->prime_shared_count++;
+    dbo->prime_shared_count++;
  amdgpu_bo_unreserve(bo);
  return 0;
  @@ -194,10 +195,11 @@ static void amdgpu_dma_buf_detach(struct 
dma_buf *dmabuf,

  {
  struct drm_gem_object *obj = dmabuf->priv;
  struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+    struct amdgpu_bo_dmabuf *dbo = to_amdgpu_bo_dmabuf(bo);
  struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  -    if (attach->dev->driver != adev->dev->driver && 
bo->prime_shared_count)

-    bo->prime_shared_count--;
+    if (attach->dev->driver != adev->dev->driver && 
dbo->prime_shared_count)

+    dbo->prime_shared_count--;
    pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
  pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -431,6 +433,7 @@ amdgpu_dma_buf_create_obj(struct drm_device 
*dev, struct dma_buf *dma_buf)

  struct amdgpu_device *adev = drm_to_adev(dev);
  struct drm_gem_object *gobj;
  struct amdgpu_bo *bo;
+    struct amdgpu_bo_dmabuf *dbo;
  uint64_t flags = 0;
  int ret;
  @@ -449,10 +452,11 @@ amdgpu_dma_buf_create_obj(struct drm_device 
*dev, struct dma_buf *dma_buf)

  goto error;
    bo = gem_to_amdgpu_bo(gobj);
+    dbo = to_amdgpu_bo_dmabuf(bo);
  bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
  bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
  if (dma_buf->ops != _dmabuf_ops)
-    bo->prime_shared_count = 1;
+    dbo->prime_shared_count = 1;
    dma_resv_unlock(resv);
  return gobj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

index 5366a806be2b..7cce8aa29fd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -783,7 +783,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, 
void *data,

  break;
  }
  case AMDGPU_GEM_OP_SET_PLACEMENT:
-    if (robj->prime_shared_count && (args->value & 
AMDGPU_GEM_DOMAIN_VRAM)) {
+    if (is_amdgpu_bo_dmabuf(robj) && (args->value & 
AMDGPU_GEM_DOMAIN_VRAM)) {

  r = -EINVAL;
  amdgpu_bo_unreserve(robj);
  break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index ad615eec1e8c..435bf85991e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -965,7 +965,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
*bo, u32 domain,

  return -EINVAL;
    /* A shared bo cannot be migrated to VRAM */
-    if (bo->prime_shared_count || bo->tbo.base.import_attach) {
+    if (is_amdgpu_bo_dmabuf(bo) || 

Re: [PATCH 4/4] drm/amdgpu: use amdgpu_bo_dmabuf for shared prime count

2021-03-17 Thread Nirmoy


On 3/17/21 9:06 AM, Christian König wrote:

That whole approach won't work :)

The prime_shared_count is on the exported BO and not on the imported one.

So any user BO can be exported.



So any BO from any drm driver is possible. No wonder it worked with 
amdgpu-amdgpu test machine.


OK scrapping the series :/


Thanks,

Nirmoy




Regards,
Christian.

Am 17.03.21 um 08:47 schrieb Nirmoy Das:

Remove prime_shared_count from base class and use that
the subclass, amdgpu_bo_dmabuf.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 12 
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
  5 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index b5c766998045..04994757cc9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -618,7 +618,7 @@ static int amdgpu_cs_parser_bos(struct 
amdgpu_cs_parser *p,

  struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
    /* Make sure we use the exclusive slot for shared BOs */
-    if (bo->prime_shared_count)
+    if (is_amdgpu_bo_dmabuf(bo))
  e->tv.num_shared = 0;
  e->bo_va = amdgpu_vm_bo_find(vm, bo);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index e0c4f7c7f1b9..3cf57ea56499 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -143,6 +143,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf 
*dmabuf,

  {
  struct drm_gem_object *obj = dmabuf->priv;
  struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+    struct amdgpu_bo_dmabuf *dbo = to_amdgpu_bo_dmabuf(bo);
  struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  int r;
  @@ -172,7 +173,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf 
*dmabuf,

  if (r)
  goto out;
  -    bo->prime_shared_count++;
+    dbo->prime_shared_count++;
  amdgpu_bo_unreserve(bo);
  return 0;
  @@ -194,10 +195,11 @@ static void amdgpu_dma_buf_detach(struct 
dma_buf *dmabuf,

  {
  struct drm_gem_object *obj = dmabuf->priv;
  struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+    struct amdgpu_bo_dmabuf *dbo = to_amdgpu_bo_dmabuf(bo);
  struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  -    if (attach->dev->driver != adev->dev->driver && 
bo->prime_shared_count)

-    bo->prime_shared_count--;
+    if (attach->dev->driver != adev->dev->driver && 
dbo->prime_shared_count)

+    dbo->prime_shared_count--;
    pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
  pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -431,6 +433,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, 
struct dma_buf *dma_buf)

  struct amdgpu_device *adev = drm_to_adev(dev);
  struct drm_gem_object *gobj;
  struct amdgpu_bo *bo;
+    struct amdgpu_bo_dmabuf *dbo;
  uint64_t flags = 0;
  int ret;
  @@ -449,10 +452,11 @@ amdgpu_dma_buf_create_obj(struct drm_device 
*dev, struct dma_buf *dma_buf)

  goto error;
    bo = gem_to_amdgpu_bo(gobj);
+    dbo = to_amdgpu_bo_dmabuf(bo);
  bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
  bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
  if (dma_buf->ops != _dmabuf_ops)
-    bo->prime_shared_count = 1;
+    dbo->prime_shared_count = 1;
    dma_resv_unlock(resv);
  return gobj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

index 5366a806be2b..7cce8aa29fd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -783,7 +783,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, 
void *data,

  break;
  }
  case AMDGPU_GEM_OP_SET_PLACEMENT:
-    if (robj->prime_shared_count && (args->value & 
AMDGPU_GEM_DOMAIN_VRAM)) {
+    if (is_amdgpu_bo_dmabuf(robj) && (args->value & 
AMDGPU_GEM_DOMAIN_VRAM)) {

  r = -EINVAL;
  amdgpu_bo_unreserve(robj);
  break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index ad615eec1e8c..435bf85991e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -965,7 +965,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
*bo, u32 domain,

  return -EINVAL;
    /* A shared bo cannot be migrated to VRAM */
-    if (bo->prime_shared_count || bo->tbo.base.import_attach) {
+    if (is_amdgpu_bo_dmabuf(bo) || bo->tbo.base.import_attach) {
  if (domain & AMDGPU_GEM_DOMAIN_GTT)
  domain = AMDGPU_GEM_DOMAIN_GTT;
  else
diff --git 

Re: [PATCH 4/4] drm/amdgpu: use amdgpu_bo_dmabuf for shared prime count

2021-03-17 Thread Christian König

That whole approach won't work :)

The prime_shared_count is on the exported BO and not on the imported one.

So any user BO can be exported.

Regards,
Christian.

Am 17.03.21 um 08:47 schrieb Nirmoy Das:

Remove prime_shared_count from base class and use that
the subclass, amdgpu_bo_dmabuf.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 12 
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
  5 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b5c766998045..04994757cc9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -618,7 +618,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
  
  		/* Make sure we use the exclusive slot for shared BOs */

-   if (bo->prime_shared_count)
+   if (is_amdgpu_bo_dmabuf(bo))
e->tv.num_shared = 0;
e->bo_va = amdgpu_vm_bo_find(vm, bo);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index e0c4f7c7f1b9..3cf57ea56499 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -143,6 +143,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
  {
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+   struct amdgpu_bo_dmabuf *dbo = to_amdgpu_bo_dmabuf(bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int r;
  
@@ -172,7 +173,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,

if (r)
goto out;
  
-	bo->prime_shared_count++;

+   dbo->prime_shared_count++;
amdgpu_bo_unreserve(bo);
return 0;
  
@@ -194,10 +195,11 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,

  {
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+   struct amdgpu_bo_dmabuf *dbo = to_amdgpu_bo_dmabuf(bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  
-	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)

-   bo->prime_shared_count--;
+   if (attach->dev->driver != adev->dev->driver && dbo->prime_shared_count)
+   dbo->prime_shared_count--;
  
  	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);

pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -431,6 +433,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
dma_buf *dma_buf)
struct amdgpu_device *adev = drm_to_adev(dev);
struct drm_gem_object *gobj;
struct amdgpu_bo *bo;
+   struct amdgpu_bo_dmabuf *dbo;
uint64_t flags = 0;
int ret;
  
@@ -449,10 +452,11 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)

goto error;
  
  	bo = gem_to_amdgpu_bo(gobj);

+   dbo = to_amdgpu_bo_dmabuf(bo);
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
if (dma_buf->ops != _dmabuf_ops)
-   bo->prime_shared_count = 1;
+   dbo->prime_shared_count = 1;
  
  	dma_resv_unlock(resv);

return gobj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 5366a806be2b..7cce8aa29fd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -783,7 +783,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
break;
}
case AMDGPU_GEM_OP_SET_PLACEMENT:
-   if (robj->prime_shared_count && (args->value & 
AMDGPU_GEM_DOMAIN_VRAM)) {
+   if (is_amdgpu_bo_dmabuf(robj) && (args->value & 
AMDGPU_GEM_DOMAIN_VRAM)) {
r = -EINVAL;
amdgpu_bo_unreserve(robj);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ad615eec1e8c..435bf85991e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -965,7 +965,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
domain,
return -EINVAL;
  
  	/* A shared bo cannot be migrated to VRAM */

-   if (bo->prime_shared_count || bo->tbo.base.import_attach) {
+   if (is_amdgpu_bo_dmabuf(bo) || bo->tbo.base.import_attach) {
if (domain & AMDGPU_GEM_DOMAIN_GTT)
domain = AMDGPU_GEM_DOMAIN_GTT;
else
diff --git 

Re: [PATCH 1/4] drm/amdgpu: introduce struct amdgpu_bo_dmabuf

2021-03-17 Thread Christian König




Am 17.03.21 um 08:47 schrieb Nirmoy Das:

Implement a new struct amdgpu_bo_dmabuf as subclass of
struct amdgpu_bo and a function to create amdgpu_bo_dmabuf
bo.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 30 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  9 +++
  2 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index fdf23e439b42..fa4686fe95c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -727,6 +727,36 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
*ubo_ptr = to_amdgpu_bo_user(bo_ptr);
return r;
  }
+
+/**
+ * amdgpu_bo_create_dmabuf - create an _bo_dmabuf buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @ubo_ptr: pointer to the buffer object pointer
+ *
+ * Create a BO to be used by prime BO export;
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+
+int amdgpu_bo_create_dmabuf(struct amdgpu_device *adev,
+   struct amdgpu_bo_param *bp,
+   struct amdgpu_bo_dmabuf **dbo_ptr)
+{
+   struct amdgpu_bo *bo_ptr;
+   int r;
+
+   bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
+   bp->bo_ptr_size = sizeof(struct amdgpu_bo_dmabuf);
+   r = amdgpu_bo_do_create(adev, bp, _ptr);
+   if (r)
+   return r;
+
+   *dbo_ptr = to_amdgpu_bo_dmabuf(bo_ptr);
+   return r;
+}
+
  /**
   * amdgpu_bo_validate - validate an _bo buffer object
   * @bo: pointer to the buffer object
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 25411b2c4dd9..4b9339c3f3eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -38,6 +38,7 @@
  #define AMDGPU_BO_MAX_PLACEMENTS  3
  
  #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)

+#define to_amdgpu_bo_dmabuf(abo) container_of((abo), struct amdgpu_bo_dmabuf, 
bo)
  
  struct amdgpu_bo_param {

unsigned long   size;
@@ -119,6 +120,11 @@ struct amdgpu_bo_user {
  
  };
  
+struct amdgpu_bo_dmabuf {

+   struct amdgpu_bobo;


That won't work like this. Imported DMA-bufs are also user BOs and can 
have tilling flags and metadata.


Christian.


+   unsigned intprime_shared_count;
+};
+
  static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object 
*tbo)
  {
return container_of(tbo, struct amdgpu_bo, tbo);
@@ -265,6 +271,9 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
  int amdgpu_bo_create_user(struct amdgpu_device *adev,
  struct amdgpu_bo_param *bp,
  struct amdgpu_bo_user **ubo_ptr);
+int amdgpu_bo_create_dmabuf(struct amdgpu_device *adev,
+   struct amdgpu_bo_param *bp,
+   struct amdgpu_bo_dmabuf **dbo_ptr);
  void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
   void **cpu_addr);
  int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] radeon: use kvcalloc for relocs and chunks

2021-03-17 Thread Christian König

Am 17.03.21 um 07:22 schrieb Chen Li:

kvmalloc_array + __GFP_ZERO is the same with kvcalloc.

As for p->chunks, it will be used in:
```
if (ib_chunk->kdata)
memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 
4);
```

If chunks doesn't zero out with __GFP_ZERO, it may point to somewhere else, 
e.g.,
```
Unable to handle kernel paging request at virtual address 0001
...
pc is at memcpy+0x84/0x250
ra is at radeon_cs_ioctl+0x368/0xb90 [radeon]
```

after allocating chunks with __GFP_KERNEL/kvcalloc, this bug is fixed.


NAK to zeroing the chunks array.

That array should be fully initialized with data before using it, 
otherwise we have a much more serious bug and zeroing it out only papers 
over the real issue.


How did you trigger the NULL pointer deref above?

Thanks,
Christian.


Signed-off-by: Chen Li 
---
  drivers/gpu/drm/radeon/radeon_cs.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index fb736ef9f9aa..059431689c2d 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -93,8 +93,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
p->dma_reloc_idx = 0;
/* FIXME: we assume that each relocs use 4 dwords */
p->nrelocs = chunk->length_dw / 4;
-   p->relocs = kvmalloc_array(p->nrelocs, sizeof(struct radeon_bo_list),
-   GFP_KERNEL | __GFP_ZERO);
+   p->relocs = kvcalloc(p->nrelocs, sizeof(struct radeon_bo_list),
+   GFP_KERNEL);
if (p->relocs == NULL) {
return -ENOMEM;
}
@@ -299,7 +299,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
}
p->cs_flags = 0;
p->nchunks = cs->num_chunks;
-   p->chunks = kvmalloc_array(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
+   p->chunks = kvcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), 
GFP_KERNEL);
if (p->chunks == NULL) {
return -ENOMEM;
}


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/4] drm/amdgpu: use amdgpu_bo_dmabuf for shared prime count

2021-03-17 Thread Nirmoy Das
Remove prime_shared_count from base class and use that
the subclass, amdgpu_bo_dmabuf.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 12 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
 5 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b5c766998045..04994757cc9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -618,7 +618,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
/* Make sure we use the exclusive slot for shared BOs */
-   if (bo->prime_shared_count)
+   if (is_amdgpu_bo_dmabuf(bo))
e->tv.num_shared = 0;
e->bo_va = amdgpu_vm_bo_find(vm, bo);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index e0c4f7c7f1b9..3cf57ea56499 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -143,6 +143,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 {
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+   struct amdgpu_bo_dmabuf *dbo = to_amdgpu_bo_dmabuf(bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int r;
 
@@ -172,7 +173,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
if (r)
goto out;
 
-   bo->prime_shared_count++;
+   dbo->prime_shared_count++;
amdgpu_bo_unreserve(bo);
return 0;
 
@@ -194,10 +195,11 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
 {
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+   struct amdgpu_bo_dmabuf *dbo = to_amdgpu_bo_dmabuf(bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 
-   if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
-   bo->prime_shared_count--;
+   if (attach->dev->driver != adev->dev->driver && dbo->prime_shared_count)
+   dbo->prime_shared_count--;
 
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -431,6 +433,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
dma_buf *dma_buf)
struct amdgpu_device *adev = drm_to_adev(dev);
struct drm_gem_object *gobj;
struct amdgpu_bo *bo;
+   struct amdgpu_bo_dmabuf *dbo;
uint64_t flags = 0;
int ret;
 
@@ -449,10 +452,11 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
dma_buf *dma_buf)
goto error;
 
bo = gem_to_amdgpu_bo(gobj);
+   dbo = to_amdgpu_bo_dmabuf(bo);
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
if (dma_buf->ops != _dmabuf_ops)
-   bo->prime_shared_count = 1;
+   dbo->prime_shared_count = 1;
 
dma_resv_unlock(resv);
return gobj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 5366a806be2b..7cce8aa29fd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -783,7 +783,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
break;
}
case AMDGPU_GEM_OP_SET_PLACEMENT:
-   if (robj->prime_shared_count && (args->value & 
AMDGPU_GEM_DOMAIN_VRAM)) {
+   if (is_amdgpu_bo_dmabuf(robj) && (args->value & 
AMDGPU_GEM_DOMAIN_VRAM)) {
r = -EINVAL;
amdgpu_bo_unreserve(robj);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ad615eec1e8c..435bf85991e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -965,7 +965,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
domain,
return -EINVAL;
 
/* A shared bo cannot be migrated to VRAM */
-   if (bo->prime_shared_count || bo->tbo.base.import_attach) {
+   if (is_amdgpu_bo_dmabuf(bo) || bo->tbo.base.import_attach) {
if (domain & AMDGPU_GEM_DOMAIN_GTT)
domain = AMDGPU_GEM_DOMAIN_GTT;
else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 3d23ad247b1b..2f1abcc4e1e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ 

[PATCH 2/4] drm/amdgpu: use amdgpu_bo_create_dmabuf()

2021-03-17 Thread Nirmoy Das
Modify amdgpu_gem_object_create() to allocate
amdgpu_bo_dmabuf BO for ttm_bo_type_sg.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 311bcdc59eda..5366a806be2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -59,6 +59,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, 
unsigned long size,
 {
struct amdgpu_bo *bo;
struct amdgpu_bo_user *ubo;
+   struct amdgpu_bo_dmabuf *dbo;
struct amdgpu_bo_param bp;
int r;
 
@@ -74,11 +75,18 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, 
unsigned long size,
bp.domain = initial_domain;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
 
-   r = amdgpu_bo_create_user(adev, , );
+   if (type != ttm_bo_type_sg)
+   r = amdgpu_bo_create_user(adev, , );
+   else
+   r = amdgpu_bo_create_dmabuf(adev, , );
if (r)
return r;
 
-   bo = >bo;
+   if (type != ttm_bo_type_sg)
+   bo = >bo;
+   else
+   bo = >bo;
+
*obj = >tbo.base;
(*obj)->funcs = _gem_object_funcs;
 
-- 
2.30.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 3/4] drm/amdgpu: introduce is_amdgpu_dmabuf()

2021-03-17 Thread Nirmoy Das
Implement is_amdgpu_dmabuf() helper function.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index fa4686fe95c4..ad615eec1e8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -120,6 +120,11 @@ bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
return false;
 }
 
+bool is_amdgpu_bo_dmabuf(struct amdgpu_bo *bo)
+{
+   return (bo->tbo.type == ttm_bo_type_sg);
+}
+
 /**
  * amdgpu_bo_placement_from_domain - set buffer's placement
  * @abo: _bo buffer object whose placement is to be set
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 4b9339c3f3eb..3d23ad247b1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -274,6 +274,7 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
 int amdgpu_bo_create_dmabuf(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
struct amdgpu_bo_dmabuf **dbo_ptr);
+bool is_amdgpu_bo_dmabuf(struct amdgpu_bo *bo);
 void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
   void **cpu_addr);
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
-- 
2.30.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/4] drm/amdgpu: introduce struct amdgpu_bo_dmabuf

2021-03-17 Thread Nirmoy Das
Implement a new struct amdgpu_bo_dmabuf as subclass of
struct amdgpu_bo and a function to create amdgpu_bo_dmabuf
bo.

Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 30 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  9 +++
 2 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index fdf23e439b42..fa4686fe95c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -727,6 +727,36 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
*ubo_ptr = to_amdgpu_bo_user(bo_ptr);
return r;
 }
+
+/**
+ * amdgpu_bo_create_dmabuf - create an _bo_dmabuf buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @ubo_ptr: pointer to the buffer object pointer
+ *
+ * Create a BO to be used by prime BO export;
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+
+int amdgpu_bo_create_dmabuf(struct amdgpu_device *adev,
+   struct amdgpu_bo_param *bp,
+   struct amdgpu_bo_dmabuf **dbo_ptr)
+{
+   struct amdgpu_bo *bo_ptr;
+   int r;
+
+   bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
+   bp->bo_ptr_size = sizeof(struct amdgpu_bo_dmabuf);
+   r = amdgpu_bo_do_create(adev, bp, _ptr);
+   if (r)
+   return r;
+
+   *dbo_ptr = to_amdgpu_bo_dmabuf(bo_ptr);
+   return r;
+}
+
 /**
  * amdgpu_bo_validate - validate an _bo buffer object
  * @bo: pointer to the buffer object
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 25411b2c4dd9..4b9339c3f3eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -38,6 +38,7 @@
 #define AMDGPU_BO_MAX_PLACEMENTS   3
 
 #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
+#define to_amdgpu_bo_dmabuf(abo) container_of((abo), struct amdgpu_bo_dmabuf, 
bo)
 
 struct amdgpu_bo_param {
unsigned long   size;
@@ -119,6 +120,11 @@ struct amdgpu_bo_user {
 
 };
 
+struct amdgpu_bo_dmabuf {
+   struct amdgpu_bobo;
+   unsigned intprime_shared_count;
+};
+
 static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
 {
return container_of(tbo, struct amdgpu_bo, tbo);
@@ -265,6 +271,9 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
 int amdgpu_bo_create_user(struct amdgpu_device *adev,
  struct amdgpu_bo_param *bp,
  struct amdgpu_bo_user **ubo_ptr);
+int amdgpu_bo_create_dmabuf(struct amdgpu_device *adev,
+   struct amdgpu_bo_param *bp,
+   struct amdgpu_bo_dmabuf **dbo_ptr);
 void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
   void **cpu_addr);
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
-- 
2.30.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

2021-03-17 Thread Christian König
I was hoping Andrey would take a look since I'm really busy with other 
work right now.


Regards,
Christian.

Am 17.03.21 um 07:46 schrieb Zhang, Jack (Jian):

Hi, Andrey/Crhistian and Team,

I didn't receive the reviewer's message from maintainers on panfrost driver for 
several days.
Due to this patch is urgent for my current working project.
Would you please help to give some review ideas?

Many Thanks,
Jack
-Original Message-
From: Zhang, Jack (Jian)
Sent: Tuesday, March 16, 2021 3:20 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Koenig, Christian ; 
Grodzovsky, Andrey ; Liu, Monk ; Deng, Emily 
; Rob Herring ; Tomeu Vizoso ; Steven 
Price 
Subject: RE: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

[AMD Public Use]

Ping

-Original Message-
From: Zhang, Jack (Jian)
Sent: Monday, March 15, 2021 1:24 PM
To: Jack Zhang ; dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Koenig, Christian 
; Grodzovsky, Andrey ; Liu, Monk ; 
Deng, Emily ; Rob Herring ; Tomeu Vizoso ; 
Steven Price 
Subject: RE: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

[AMD Public Use]

Hi, Rob/Tomeu/Steven,

Would you please help to review this patch for panfrost driver?

Thanks,
Jack Zhang

-Original Message-
From: Jack Zhang 
Sent: Monday, March 15, 2021 1:21 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Koenig, Christian 
; Grodzovsky, Andrey ; Liu, Monk 
; Deng, Emily 
Cc: Zhang, Jack (Jian) 
Subject: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

re-insert Bailing jobs to avoid memory leak.

V2: move re-insert step to drm/scheduler logic
V3: add panfrost's return value for bailing jobs in case it hits the memleak 
issue.

Signed-off-by: Jack Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 8 ++--
  drivers/gpu/drm/panfrost/panfrost_job.c| 4 ++--
  drivers/gpu/drm/scheduler/sched_main.c | 8 +++-
  include/drm/gpu_scheduler.h| 1 +
  5 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 79b9cc73763f..86463b0f936e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4815,8 +4815,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
job ? job->base.id : -1);
  
  		/* even we skipped this reset, still need to set the job to guilty */

-   if (job)
+   if (job) {
drm_sched_increase_karma(>base);
+   r = DRM_GPU_SCHED_STAT_BAILING;
+   }
goto skip_recovery;
}
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index 759b34799221..41390bdacd9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -34,6 +34,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
struct amdgpu_job *job = to_amdgpu_job(s_job);
struct amdgpu_task_info ti;
struct amdgpu_device *adev = ring->adev;
+   int ret;
  
  	memset(, 0, sizeof(struct amdgpu_task_info));
  
@@ -52,8 +53,11 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)

  ti.process_name, ti.tgid, ti.task_name, ti.pid);
  
  	if (amdgpu_device_should_recover_gpu(ring->adev)) {

-   amdgpu_device_gpu_recover(ring->adev, job);
-   return DRM_GPU_SCHED_STAT_NOMINAL;
+   ret = amdgpu_device_gpu_recover(ring->adev, job);
+   if (ret == DRM_GPU_SCHED_STAT_BAILING)
+   return DRM_GPU_SCHED_STAT_BAILING;
+   else
+   return DRM_GPU_SCHED_STAT_NOMINAL;
} else {
drm_sched_suspend_timeout(>sched);
if (amdgpu_sriov_vf(adev))
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
b/drivers/gpu/drm/panfrost/panfrost_job.c
index 6003cfeb1322..e2cb4f32dae1 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -444,7 +444,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct 
drm_sched_job
 * spurious. Bail out.
 */
if (dma_fence_is_signaled(job->done_fence))
-   return DRM_GPU_SCHED_STAT_NOMINAL;
+   return DRM_GPU_SCHED_STAT_BAILING;
  
  	dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",

js,
@@ -456,7 +456,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct 
drm_sched_job
  
  	/* Scheduler is already stopped, nothing to do. */

if (!panfrost_scheduler_stop(>js->queue[js], sched_job))
-   return 

RE: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

2021-03-17 Thread Zhang, Jack (Jian)
Hi, Andrey/Crhistian and Team,

I didn't receive the reviewer's message from maintainers on panfrost driver for 
several days.
Due to this patch is urgent for my current working project.
Would you please help to give some review ideas?

Many Thanks,
Jack
-Original Message-
From: Zhang, Jack (Jian) 
Sent: Tuesday, March 16, 2021 3:20 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Koenig, 
Christian ; Grodzovsky, Andrey 
; Liu, Monk ; Deng, Emily 
; Rob Herring ; Tomeu Vizoso 
; Steven Price 
Subject: RE: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

[AMD Public Use]

Ping

-Original Message-
From: Zhang, Jack (Jian) 
Sent: Monday, March 15, 2021 1:24 PM
To: Jack Zhang ; dri-de...@lists.freedesktop.org; 
amd-gfx@lists.freedesktop.org; Koenig, Christian ; 
Grodzovsky, Andrey ; Liu, Monk ; 
Deng, Emily ; Rob Herring ; Tomeu Vizoso 
; Steven Price 
Subject: RE: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

[AMD Public Use]

Hi, Rob/Tomeu/Steven,

Would you please help to review this patch for panfrost driver?

Thanks,
Jack Zhang

-Original Message-
From: Jack Zhang 
Sent: Monday, March 15, 2021 1:21 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Koenig, 
Christian ; Grodzovsky, Andrey 
; Liu, Monk ; Deng, Emily 

Cc: Zhang, Jack (Jian) 
Subject: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

re-insert Bailing jobs to avoid memory leak.

V2: move re-insert step to drm/scheduler logic
V3: add panfrost's return value for bailing jobs in case it hits the memleak 
issue.

Signed-off-by: Jack Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 8 ++--
 drivers/gpu/drm/panfrost/panfrost_job.c| 4 ++--
 drivers/gpu/drm/scheduler/sched_main.c | 8 +++-
 include/drm/gpu_scheduler.h| 1 +
 5 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 79b9cc73763f..86463b0f936e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4815,8 +4815,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
job ? job->base.id : -1);
 
/* even we skipped this reset, still need to set the job to 
guilty */
-   if (job)
+   if (job) {
drm_sched_increase_karma(>base);
+   r = DRM_GPU_SCHED_STAT_BAILING;
+   }
goto skip_recovery;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 759b34799221..41390bdacd9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -34,6 +34,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
struct amdgpu_job *job = to_amdgpu_job(s_job);
struct amdgpu_task_info ti;
struct amdgpu_device *adev = ring->adev;
+   int ret;
 
memset(, 0, sizeof(struct amdgpu_task_info));
 
@@ -52,8 +53,11 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
  ti.process_name, ti.tgid, ti.task_name, ti.pid);
 
if (amdgpu_device_should_recover_gpu(ring->adev)) {
-   amdgpu_device_gpu_recover(ring->adev, job);
-   return DRM_GPU_SCHED_STAT_NOMINAL;
+   ret = amdgpu_device_gpu_recover(ring->adev, job);
+   if (ret == DRM_GPU_SCHED_STAT_BAILING)
+   return DRM_GPU_SCHED_STAT_BAILING;
+   else
+   return DRM_GPU_SCHED_STAT_NOMINAL;
} else {
drm_sched_suspend_timeout(>sched);
if (amdgpu_sriov_vf(adev))
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
b/drivers/gpu/drm/panfrost/panfrost_job.c
index 6003cfeb1322..e2cb4f32dae1 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -444,7 +444,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct 
drm_sched_job
 * spurious. Bail out.
 */
if (dma_fence_is_signaled(job->done_fence))
-   return DRM_GPU_SCHED_STAT_NOMINAL;
+   return DRM_GPU_SCHED_STAT_BAILING;
 
dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, 
status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
js,
@@ -456,7 +456,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct 
drm_sched_job
 
/* Scheduler is already stopped, nothing to do. */
if (!panfrost_scheduler_stop(>js->queue[js], sched_job))
-   return DRM_GPU_SCHED_STAT_NOMINAL;
+   return DRM_GPU_SCHED_STAT_BAILING;
 
/* Schedule a reset if there's no reset in progress. */
if