[PATCH 1/2] drm amdgpu: Skip tmr load for SRIOV

2020-07-27 Thread Liu ChengZhe
1. For Navi12, CHIP_SIENNA_CICHLID, skip tmr load operation;
2. Check pointer before release firmware.

v2: use CHIP_SIENNA_CICHLID instead
v3: remove local "bool ret"; fix grammer issue
v4: use my name instead of "root"

Signed-off-by: Liu ChengZhe 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 35 -
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a053b7af0680..7f18286a0cc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -193,12 +193,18 @@ static int psp_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
psp_memory_training_fini(>psp);
-   release_firmware(adev->psp.sos_fw);
-   adev->psp.sos_fw = NULL;
-   release_firmware(adev->psp.asd_fw);
-   adev->psp.asd_fw = NULL;
-   release_firmware(adev->psp.ta_fw);
-   adev->psp.ta_fw = NULL;
+   if (adev->psp.sos_fw) {
+   release_firmware(adev->psp.sos_fw);
+   adev->psp.sos_fw = NULL;
+   }
+   if (adev->psp.asd_fw) {
+   release_firmware(adev->psp.asd_fw);
+   adev->psp.asd_fw = NULL;
+   }
+   if (adev->psp.ta_fw) {
+   release_firmware(adev->psp.ta_fw);
+   adev->psp.ta_fw = NULL;
+   }
 
if (adev->asic_type == CHIP_NAVI10)
psp_sysfs_fini(adev);
@@ -409,11 +415,28 @@ static int psp_clear_vf_fw(struct psp_context *psp)
return ret;
 }
 
+static bool psp_skip_tmr(struct psp_context *psp)
+{
+   switch (psp->adev->asic_type) {
+   case CHIP_NAVI12:
+   case CHIP_SIENNA_CICHLID:
+   return true;
+   default:
+   return false;
+   }
+}
+
 static int psp_tmr_load(struct psp_context *psp)
 {
int ret;
struct psp_gfx_cmd_resp *cmd;
 
+   /* for Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR
+* (already setup by host driver)
+*/
+   if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+   return 0;
+
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amdgpu: add interface amdgpu_gfx_init_spm_golden for Navi1x

2020-07-27 Thread Tianci Yin
From: "Tianci.Yin" 

On Navi1x, the SPM golden settings will be lost after GFXOFF enter/exit,
reconfiguration is needed. Make the configuration code as an interface for
future use.

Change-Id: I172f3dc7f59da69b0364052dcad75a9c9aab019e
Signed-off-by: Tianci.Yin 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 34 ++---
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 1e7a2b0997c5..a611e78dd4ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -216,6 +216,7 @@ struct amdgpu_gfx_funcs {
int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
int (*query_ras_error_count) (struct amdgpu_device *adev, void 
*ras_error_status);
void (*reset_ras_error_count) (struct amdgpu_device *adev);
+   void (*init_spm_golden)(struct amdgpu_device *adev);
 };
 
 struct sq_work {
@@ -324,6 +325,7 @@ struct amdgpu_gfx {
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
 #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) 
(adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
 #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) 
(adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid))
+#define amdgpu_gfx_init_spm_golden(adev) 
(adev)->gfx.funcs->init_spm_golden((adev))
 
 /**
  * amdgpu_gfx_create_bitmask - create a bitmask
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index db9f1e89a0f8..da21ad04ac0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3307,6 +3307,29 @@ static void gfx_v10_0_set_kiq_pm4_funcs(struct 
amdgpu_device *adev)
adev->gfx.kiq.pmf = _v10_0_kiq_pm4_funcs;
 }
 
+static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)
+{
+   switch (adev->asic_type) {
+   case CHIP_NAVI10:
+   soc15_program_register_sequence(adev,
+   
golden_settings_gc_rlc_spm_10_0_nv10,
+   (const 
u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_0_nv10));
+   break;
+   case CHIP_NAVI14:
+   soc15_program_register_sequence(adev,
+   
golden_settings_gc_rlc_spm_10_1_nv14,
+   (const 
u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_nv14));
+   break;
+   case CHIP_NAVI12:
+   soc15_program_register_sequence(adev,
+   
golden_settings_gc_rlc_spm_10_1_2_nv12,
+   (const 
u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_2_nv12));
+   break;
+   default:
+   break;
+   }
+}
+
 static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
 {
switch (adev->asic_type) {
@@ -3317,9 +3340,6 @@ static void gfx_v10_0_init_golden_registers(struct 
amdgpu_device *adev)
soc15_program_register_sequence(adev,
golden_settings_gc_10_0_nv10,
(const 
u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10));
-   soc15_program_register_sequence(adev,
-   
golden_settings_gc_rlc_spm_10_0_nv10,
-   (const 
u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_0_nv10));
break;
case CHIP_NAVI14:
soc15_program_register_sequence(adev,
@@ -3328,9 +3348,6 @@ static void gfx_v10_0_init_golden_registers(struct 
amdgpu_device *adev)
soc15_program_register_sequence(adev,
golden_settings_gc_10_1_nv14,
(const 
u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14));
-   soc15_program_register_sequence(adev,
-   
golden_settings_gc_rlc_spm_10_1_nv14,
-   (const 
u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_nv14));
break;
case CHIP_NAVI12:
soc15_program_register_sequence(adev,
@@ -3339,9 +3356,6 @@ static void gfx_v10_0_init_golden_registers(struct 
amdgpu_device *adev)
soc15_program_register_sequence(adev,
golden_settings_gc_10_1_2_nv12,
(const 
u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12));
-   soc15_program_register_sequence(adev,
-   
golden_settings_gc_rlc_spm_10_1_2_nv12,
-

[PATCH 2/2] drm/amdgpu: reconfigure spm golden settings on Navi1x after GFXOFF exit

2020-07-27 Thread Tianci Yin
From: "Tianci.Yin" 

On Navi1x, the SPM golden settings will be lost after GFXOFF enter/exit,
reconfigure the golden settings after GFXOFF exit.

Change-Id: I9358ba9c65f241c36f8a35916170b19535148ee9
Signed-off-by: Tianci.Yin 
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c 
b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 55463e7a11e2..5da0436d41e0 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1309,6 +1309,7 @@ static int smu_enable_umd_pstate(void *handle,
 
struct smu_context *smu = (struct smu_context*)(handle);
struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+   struct amdgpu_device *adev = smu->adev;
 
if (!smu->is_apu && !smu_dpm_ctx->dpm_context)
return -EINVAL;
@@ -1324,6 +1325,16 @@ static int smu_enable_umd_pstate(void *handle,
amdgpu_device_ip_set_clockgating_state(smu->adev,
   
AMD_IP_BLOCK_TYPE_GFX,
   
AMD_CG_STATE_UNGATE);
+
+   if (adev->asic_type >= CHIP_NAVI10 &&
+   adev->asic_type <= CHIP_NAVI12 &&
+   (adev->pm.pp_feature & PP_GFXOFF_MASK)) {
+   if (adev->gfx.funcs->init_spm_golden) {
+   dev_dbg(adev->dev,"GFXOFF exited, 
re-init SPM golden settings\n");
+   amdgpu_gfx_init_spm_golden(adev);
+   } else
+   dev_warn(adev->dev,"Callback 
init_spm_golden is NULL\n");
+   }
}
} else {
/* exit umd pstate, restore level, enable gfx cg*/
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm amdgpu: Skip tmr load for SRIOV

2020-07-27 Thread Liu ChengZhe
From: root 

1. For Navi12, CHIP_SIENNA_CICHLID, skip tmr load operation;
2. Check pointer before release firmware.

v2: use CHIP_SIENNA_CICHLID instead
v3: remove local "bool ret"; fix grammer issue
Signed-off-by: root 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 35 -
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a053b7af0680..7f18286a0cc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -193,12 +193,18 @@ static int psp_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
psp_memory_training_fini(>psp);
-   release_firmware(adev->psp.sos_fw);
-   adev->psp.sos_fw = NULL;
-   release_firmware(adev->psp.asd_fw);
-   adev->psp.asd_fw = NULL;
-   release_firmware(adev->psp.ta_fw);
-   adev->psp.ta_fw = NULL;
+   if (adev->psp.sos_fw) {
+   release_firmware(adev->psp.sos_fw);
+   adev->psp.sos_fw = NULL;
+   }
+   if (adev->psp.asd_fw) {
+   release_firmware(adev->psp.asd_fw);
+   adev->psp.asd_fw = NULL;
+   }
+   if (adev->psp.ta_fw) {
+   release_firmware(adev->psp.ta_fw);
+   adev->psp.ta_fw = NULL;
+   }
 
if (adev->asic_type == CHIP_NAVI10)
psp_sysfs_fini(adev);
@@ -409,11 +415,28 @@ static int psp_clear_vf_fw(struct psp_context *psp)
return ret;
 }
 
+static bool psp_skip_tmr(struct psp_context *psp)
+{
+   switch (psp->adev->asic_type) {
+   case CHIP_NAVI12:
+   case CHIP_SIENNA_CICHLID:
+   return true;
+   default:
+   return false;
+   }
+}
+
 static int psp_tmr_load(struct psp_context *psp)
 {
int ret;
struct psp_gfx_cmd_resp *cmd;
 
+   /* for Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR
+* (already setup by host driver)
+*/
+   if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+   return 0;
+
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: enable umc 8.7 functions in gmc v10

2020-07-27 Thread Chen, Guchun
[AMD Public Use]

Reviewed-by: Guchun Chen 

Regards,
Guchun

From: Clements, John 
Sent: Tuesday, July 28, 2020 10:56 AM
To: Chen, Guchun ; amd-gfx list 
; Zhang, Hawking 
Subject: RE: [PATCH] drm/amdgpu: enable umc 8.7 functions in gmc v10


[AMD Public Use]

Thank you for the feedback @Chen, Guchun,

I've attached an updated patch with the corrected typo and also an additional 
change to add the new UMC source to the makefile

Thank you,
John Clements

From: Chen, Guchun mailto:guchun.c...@amd.com>>
Sent: Monday, July 27, 2020 4:50 PM
To: Clements, John mailto:john.cleme...@amd.com>>; 
amd-gfx list 
mailto:amd-gfx@lists.freedesktop.org>>; Zhang, 
Hawking mailto:hawking.zh...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: enable umc 8.7 functions in gmc v10


[AMD Public Use]

One typo in commit subject.

add support for umc 8.7 initialzation and RAS interrupt

s/initialzation /initialization

With this fixed, the patch is:
Reviewed-by: Guchun Chen guchun.c...@amd.com

Regards,
Guchun

From: amd-gfx 
mailto:amd-gfx-boun...@lists.freedesktop.org>>
 On Behalf Of Clements, John
Sent: Monday, July 27, 2020 4:32 PM
To: amd-gfx list 
mailto:amd-gfx@lists.freedesktop.org>>; Zhang, 
Hawking mailto:hawking.zh...@amd.com>>
Subject: [PATCH] drm/amdgpu: enable umc 8.7 functions in gmc v10


[AMD Public Use]

Submitting patch to enable UMC 8.7 GECC functions in GMC v10
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: add umv v8.7 source to makefile

2020-07-27 Thread Zhang, Hawking
[AMD Public Use]

I would suggest you combine the patch with your previous series. and send them 
out for the review if you haven't completed the review yet.

Regards,
Hawking
From: Clements, John 
Sent: Tuesday, July 28, 2020 10:42
To: amd-gfx list ; Zhang, Hawking 

Subject: [PATCH] drm/amdgpu: add umv v8.7 source to makefile


[AMD Public Use]

Submitting patch to add UMC 8.7 source file to makefile
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Clear dm_state for fast updates

2020-07-27 Thread Kazlauskas, Nicholas

On 2020-07-27 5:32 p.m., Daniel Vetter wrote:

On Mon, Jul 27, 2020 at 11:11 PM Mazin Rezk  wrote:


On Monday, July 27, 2020 4:29 PM, Daniel Vetter  wrote:


On Mon, Jul 27, 2020 at 9:28 PM Christian König
 wrote:


Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:

On 2020-07-27 9:39 a.m., Christian König wrote:

Am 27.07.20 um 07:40 schrieb Mazin Rezk:

This patch fixes a race condition that causes a use-after-free during
amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
commits
are requested and the second one finishes before the first.
Essentially,
this bug occurs when the following sequence of events happens:

1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
deferred to the workqueue.

2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
deferred to the workqueue.

3. Commit #2 starts before commit #1, dm_state #1 is used in the
commit_tail and commit #2 completes, freeing dm_state #1.

4. Commit #1 starts after commit #2 completes, uses the freed dm_state
1 and dereferences a freelist pointer while setting the context.


Well I only have a one mile high view on this, but why don't you let
the work items execute in order?

That would be better anyway cause this way we don't trigger a cache
line ping pong between CPUs.

Christian.


We use the DRM helpers for managing drm_atomic_commit_state and those
helpers internally push non-blocking commit work into the system
unbound work queue.


Mhm, well if you send those helper atomic commits in the order A,B and
they execute it in the order B,A I would call that a bug :)


The way it works is it pushes all commits into unbound work queue, but
then forces serialization as needed. We do _not_ want e.g. updates on
different CRTC to be serialized, that would result in lots of judder.
And hw is funny enough that there's all kinds of dependencies.

The way you force synchronization is by adding other CRTC state
objects. So if DC is busted and can only handle a single update per
work item, then I guess you always need all CRTC states and everything
will be run in order. But that also totally kills modern multi-screen
compositors. Xorg isn't modern, just in case that's not clear :-)

Lucking at the code it seems like you indeed have only a single dm
state, so yeah global sync is what you'll need as immediate fix, and
then maybe fix up DM to not be quite so silly ... or at least only do
the dm state stuff when really needed.

We could also sprinkle the drm_crtc_commit structure around a bit
(it's the glue that provides the synchronization across commits), but
since your dm state is global just grabbing all crtc states
unconditionally as part of that is probably best.


While we could duplicate a copy of that code with nothing but the
workqueue changed that isn't something I'd really like to maintain
going forward.


I'm not talking about duplicating the code, I'm talking about fixing the
helpers. I don't know that code well, but from the outside it sounds
like a bug there.

And executing work items in the order they are submitted is trivial.

Had anybody pinged Daniel or other people familiar with the helper code
about it?


Yeah something is wrong here, and the fix looks horrible :-)

Aside, I've also seen some recent discussion flare up about
drm_atomic_state_get/put used to paper over some other use-after-free,
but this time related to interrupt handlers. Maybe a few rules about
that:
- dont
- especially not when it's interrupt handlers, because you can't call
drm_atomic_state_put from interrupt handlers.

Instead have an spin_lock_irq to protect the shared date with your
interrupt handler, and _copy_ the date over. This is e.g. what
drm_crtc_arm_vblank_event does.


Nicholas wrote a patch that attempted to resolve the issue by adding every
CRTC into the commit to use use the stall checks. [1] While this forces
synchronisation on commits, it's kind of a hacky method that may take a
toll on performance.

Is it possible to have a DRM helper that forces synchronisation on some
commits without having to add every CRTC into the commit?

Also, is synchronisation really necessary for fast updates in amdgpu?
I'll admit, the idea of eliminating the use-after-free bug by eliminating
the use entirely doesn't seem ideal; but is forcing synchronisation on
these updates that much better?


Well clearing the dc_state pointer here and then allocating another
one in atomic_commit_tail also looks fishy. The proper fix is probably
a lot more involved, but yeah interim fix is to grab all crtc states
iff you also grabbed the dm_atomic_state structure. Real fix is to
only do this when necessary, which pretty much means the dc_state
needs to be somehow split up, or there needs to be some guarantees
about when it's necessary and when not. Otherwise parallel commits on
different CRTC are not possible with the current dc backend code.


Thanks for spending some time to help take a look at this as well.

The DRM documentation (at least at 

[PATCH] drm/amdgpu: add umv v8.7 source to makefile

2020-07-27 Thread Clements, John
[AMD Public Use]

Submitting patch to add UMC 8.7 source file to makefile


0001-drm-amdgpu-add-umv-v8.7-source-to-makefile.patch
Description: 0001-drm-amdgpu-add-umv-v8.7-source-to-makefile.patch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: fix PSP autoload twice in FLR

2020-07-27 Thread Liu ChengZhe
the block->status.hw = false assignment will overwrite PSP's previous
hw status, which will cause PSP execute resume operation after hw init.

v2: remove the braces

Signed-off-by: Liu ChengZhe 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac97fbd2..5d9affa1d35a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2574,6 +2574,9 @@ static int amdgpu_device_ip_reinit_early_sriov(struct 
amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_IH,
};
 
+   for (i = 0; i < adev->num_ip_blocks; i++)
+   adev->ip_blocks[i].status.hw = false;
+
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
int j;
struct amdgpu_ip_block *block;
@@ -2581,7 +2584,6 @@ static int amdgpu_device_ip_reinit_early_sriov(struct 
amdgpu_device *adev)
for (j = 0; j < adev->num_ip_blocks; j++) {
block = >ip_blocks[j];
 
-   block->status.hw = false;
if (block->version->type != ip_order[i] ||
!block->status.valid)
continue;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: Add GPU reset SMI event

2020-07-27 Thread Mukul Joshi
Add support for reporting GPU reset events through SMI.

Signed-off-by: Mukul Joshi 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 18 ++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  1 +
 include/uapi/linux/kfd_ioctl.h  |  1 +
 4 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index d5e790f046b4..d788aa24ef3f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -811,6 +811,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
 
+   kfd_smi_event_update_gpu_reset(kfd);
+
kfd->dqm->ops.pre_reset(kfd->dqm);
 
kgd2kfd_suspend(kfd, false);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 4d4b6e3ab697..4de57923d9f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -174,6 +174,24 @@ static void add_event_to_kfifo(struct kfd_dev *dev, 
unsigned int smi_event,
rcu_read_unlock();
 }
 
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev)
+{
+   /*
+* GpuReset msg = empty
+* 1 byte event + 1 byte space + 1 byte \n + 1 byte \0 = 4
+*/
+   char fifo_in[4];
+   int len;
+
+   if (list_empty(>smi_clients)) {
+   return;
+   }
+
+   len = snprintf(fifo_in, 4, "%x \n", KFD_SMI_EVENT_GPU_RESET);
+
+   add_event_to_kfifo(dev, KFD_SMI_EVENT_GPU_RESET, fifo_in, len);
+}
+
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 uint32_t throttle_bitmask)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index 15537b2cccb5..ffdb822d120b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -27,5 +27,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 uint32_t throttle_bitmask);
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev);
 
 #endif
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index cb1f963a84e0..128b6235b540 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -453,6 +453,7 @@ enum kfd_smi_event {
 KFD_SMI_EVENT_NONE = 0, /* not used */
 KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
 KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
+   KFD_SMI_EVENT_GPU_RESET = 3,
 };
 
 #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Clear dm_state for fast updates

2020-07-27 Thread Mazin Rezk
On Monday, July 27, 2020 9:26 AM, Kazlauskas, Nicholas 
 wrote:

> On 2020-07-27 1:40 a.m., Mazin Rezk wrote:
> > This patch fixes a race condition that causes a use-after-free during
> > amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking commits
> > are requested and the second one finishes before the first. Essentially,
> > this bug occurs when the following sequence of events happens:
> >
> > 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> > deferred to the workqueue.
> >
> > 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> > deferred to the workqueue.
> >
> > 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> > commit_tail and commit #2 completes, freeing dm_state #1.
> >
> > 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> > 1 and dereferences a freelist pointer while setting the context.
> >
> > Since this bug has only been spotted with fast commits, this patch fixes
> > the bug by clearing the dm_state instead of using the old dc_state for
> > fast updates. In addition, since dm_state is only used for its dc_state
> > and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is found,
> > removing the dm_state should not have any consequences in fast updates.
> >
> > This use-after-free bug has existed for a while now, but only caused a
> > noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: relocate
> > freelist pointer to middle of object") moving the freelist pointer from
> > dm_state->base (which was unused) to dm_state->context (which is
> > dereferenced).
> >
> > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> > Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state for fast 
> > updates")
> > Reported-by: Duncan <1i5t5.dun...@cox.net>
> > Signed-off-by: Mazin Rezk 
> > ---
> >   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++-
> >   1 file changed, 27 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index 86ffa0c2880f..710edc70e37e 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct drm_device 
> > *dev,
> >  * the same resource. If we have a new DC context as part of
> >  * the DM atomic state from validation we need to free it and
> >  * retain the existing one instead.
> > +*
> > +* Furthermore, since the DM atomic state only contains the DC
> > +* context and can safely be annulled, we can free the state
> > +* and clear the associated private object now to free
> > +* some memory and avoid a possible use-after-free later.
> >  */
> > -   struct dm_atomic_state *new_dm_state, *old_dm_state;
> >
> > -   new_dm_state = dm_atomic_get_new_state(state);
> > -   old_dm_state = dm_atomic_get_old_state(state);
> > +   for (i = 0; i < state->num_private_objs; i++) {
> > +   struct drm_private_obj *obj = 
> > state->private_objs[i].ptr;
> >
> > -   if (new_dm_state && old_dm_state) {
> > -   if (new_dm_state->context)
> > -   dc_release_state(new_dm_state->context);
> > +   if (obj->funcs == adev->dm.atomic_obj.funcs) {
> > +   int j = state->num_private_objs-1;
> >
> > -   new_dm_state->context = old_dm_state->context;
> > +   dm_atomic_destroy_state(obj,
> > +   state->private_objs[i].state);
> > +
> > +   /* If i is not at the end of the array then the
> > +* last element needs to be moved to where i was
> > +* before the array can safely be truncated.
> > +*/
> > +   if (i != j)
> > +   state->private_objs[i] =
> > +   state->private_objs[j];
> >
> > -   if (old_dm_state->context)
> > -   dc_retain_state(old_dm_state->context);
> > +   state->private_objs[j].ptr = NULL;
> > +   state->private_objs[j].state = NULL;
> > +   state->private_objs[j].old_state = NULL;
> > +   state->private_objs[j].new_state = NULL;
> > +
> > +   state->num_private_objs = j;
> > +   break;
> > +   }
>
> In the bug report itself I mentioned that I don't really like hacking
> around the DRM core for resolving this patch but to go into more
> specifics, it's really two issues of code 

Re: [PATCH] drm/amdkfd: Replace bitmask with event idx in SMI event msg

2020-07-27 Thread Felix Kuehling
Am 2020-07-26 um 5:24 p.m. schrieb Mukul Joshi:
> Event bitmask is a 64-bit mask with only 1 bit set. Sending this
> event bitmask in KFD SMI event message is both wasteful of memory
> and potentially limiting to only 64 events. Instead send event
> index in SMI event message.

Please add a statement here, that for the two event types defined so
far, this change does not break the ABI. The new index will be identical
to the mask used before.


>
> Signed-off-by: Mukul Joshi 
> Suggested-by: Felix Kuehling 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 24 +++--
>  include/uapi/linux/kfd_ioctl.h  | 10 ++---
>  2 files changed, 20 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> index 86c2c3e97944..4d4b6e3ab697 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> @@ -149,7 +149,7 @@ static int kfd_smi_ev_release(struct inode *inode, struct 
> file *filep)
>   return 0;
>  }
>  
> -static void add_event_to_kfifo(struct kfd_dev *dev, unsigned long long 
> smi_event,
> +static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
> char *event_msg, int len)
>  {
>   struct kfd_smi_client *client;
> @@ -157,14 +157,15 @@ static void add_event_to_kfifo(struct kfd_dev *dev, 
> unsigned long long smi_event
>   rcu_read_lock();
>  
>   list_for_each_entry_rcu(client, >smi_clients, list) {
> - if (!(READ_ONCE(client->events) & smi_event))
> + if (!(READ_ONCE(client->events) &
> + KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
>   continue;
>   spin_lock(>lock);
>   if (kfifo_avail(>fifo) >= len) {
>   kfifo_in(>fifo, event_msg, len);
>   wake_up_all(>wait_queue);
>   } else {
> - pr_debug("smi_event(EventID: %llu): no space left\n",
> + pr_debug("smi_event(EventID: %u): no space left\n",
>   smi_event);
>   }
>   spin_unlock(>lock);
> @@ -180,21 +181,21 @@ void kfd_smi_event_update_thermal_throttling(struct 
> kfd_dev *dev,
>   /*
>* ThermalThrottle msg = throttle_bitmask(8):
>*   thermal_interrupt_count(16):
> -  * 16 bytes event + 1 byte space + 8 byte throttle_bitmask +
> +  * 1 byte event + 1 byte space + 8 byte throttle_bitmask +
>* 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
> -  * 1 byte \0 = 44
> +  * 1 byte \0 = 29
>*/
> - char fifo_in[44];
> + char fifo_in[29];
>   int len;
>  
>   if (list_empty(>smi_clients))
>   return;
>  
> - len = snprintf(fifo_in, 44, "%x %x:%llx\n",
> + len = snprintf(fifo_in, 29, "%x %x:%llx\n",
>  KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
>  atomic64_read(>smu.throttle_int_counter));
>  
> - add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
> + add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
>  }
>  
>  void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
> @@ -202,9 +203,10 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, 
> uint16_t pasid)
>   struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
>   struct amdgpu_task_info task_info;
>   /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
> - /* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43
> + /* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
> +  * 1 byte \0 = 29
>*/
> - char fifo_in[43];
> + char fifo_in[29];
>   int len;
>  
>   if (list_empty(>smi_clients))
> @@ -216,7 +218,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, 
> uint16_t pasid)
>   if (!task_info.pid)
>   return;
>  
> - len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
> + len = snprintf(fifo_in, 29, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
>   task_info.pid, task_info.task_name);
>  
>   add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index df6c7a43aadc..796f836ba773 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -449,9 +449,13 @@ struct kfd_ioctl_import_dmabuf_args {
>  /*
>   * KFD SMI(System Management Interface) events
>   */
> -/* Event type (defined by bitmask) */
> -#define KFD_SMI_EVENT_VMFAULT0x0001
> -#define KFD_SMI_EVENT_THERMAL_THROTTLE   0x0002
> +enum kfd_smi_event {
> +KFD_SMI_EVENT_NONE = 0, /* not used */
> +KFD_SMI_EVENT_VMFAULT = 1, /* 

Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v2)

2020-07-27 Thread Felix Kuehling
Am 2020-07-27 um 6:47 a.m. schrieb Monk Liu:
> what:
> the MQD's save and restore of kernel compute queues cost lots of clocks
> during world switch which impacts a lot to multi-VF performance
>
> how:
> introduce a paramter to control the number of kernel compute queues to
> avoid performance drop if there is no kernel compute queue needed
>
> notes:
> this paramter only affects gfx 8/9/10
>
> TODO:
> in the future we will let hypervisor driver to set this paramter
> automatically thus no need for user to configure it through
> modprobe in virtual machine
>
> Signed-off-by: Monk Liu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 27 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 ++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 
> +++---
>  7 files changed, 71 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e97c088..71a3d6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
>  #ifdef CONFIG_DRM_AMDGPU_CIK
>  extern int amdgpu_cik_support;
>  #endif
> +extern int amdgpu_num_kcq_user_set;
>  
>  #define AMDGPU_VM_MAX_NUM_CTX4096
>  #define AMDGPU_SG_THRESHOLD  (256*1024*1024)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac9..18b93ef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct 
> amdgpu_device *adev)
>  
>   amdgpu_gmc_tmz_set(adev);
>  
> + if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0) {
> + amdgpu_num_kcq_user_set = 8;
> + dev_warn(adev-dev, "set KCQ number to 8 due to invalid paramter 
> provided by user\n");
> + }
> +
>   return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6291f5f..03a94e9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -150,6 +150,7 @@ int amdgpu_noretry;
>  int amdgpu_force_asic_type = -1;
>  int amdgpu_tmz = 0;
>  int amdgpu_reset_method = -1; /* auto */
> +int amdgpu_num_kcq_user_set = 8;
>  
>  struct amdgpu_mgpu_info mgpu_info = {
>   .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
>  MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
> legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
>  module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>  
> +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to 
> greater than 8 or less than 0, only affect gfx 8+)");
> +module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
> +
>  static const struct pci_device_id pciidlist[] = {
>  #ifdef  CONFIG_DRM_AMDGPU_SI
>   {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8eff017..0b59049 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
> amdgpu_device *adev,
>  
>  void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>  {
> - int i, queue, pipe, mec;
> + int i, queue, pipe, mec, j = 0;
>   bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
>  
>   /* policy for amdgpu compute queue ownership */
> @@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct 
> amdgpu_device *adev)
>  
>   if (multipipe_policy) {
>   /* policy: amdgpu owns the first two queues of the 
> first MEC */
> - if (mec == 0 && queue < 2)
> - set_bit(i, adev->gfx.mec.queue_bitmap);
> + if (mec == 0 && queue < 2) {
> + if (j++ < adev->gfx.num_compute_rings)

This is not ideal, because it wouldn't distribute the queues evenly
across pipes if there are fewer than 7. I would change how queue and
pipe are calculated from i for the multipipe_policy case:

if (multipipe_policy) {
pipe = i % adev->gfx.mec.num_pipe_per_mec;
queue = (i / adev->gfx.mec.num_pipe_per_mec)
% adev->gfx.mec.num_queue_per_pipe;
} else {
/* previous way */
}


Re: [PATCH] drm/amd/display: Clear dm_state for fast updates

2020-07-27 Thread Daniel Vetter
On Mon, Jul 27, 2020 at 11:11 PM Mazin Rezk  wrote:
>
> On Monday, July 27, 2020 4:29 PM, Daniel Vetter  wrote:
>
> > On Mon, Jul 27, 2020 at 9:28 PM Christian König
> >  wrote:
> > >
> > > Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
> > > > On 2020-07-27 9:39 a.m., Christian König wrote:
> > > >> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> > > >>> This patch fixes a race condition that causes a use-after-free during
> > > >>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
> > > >>> commits
> > > >>> are requested and the second one finishes before the first.
> > > >>> Essentially,
> > > >>> this bug occurs when the following sequence of events happens:
> > > >>>
> > > >>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> > > >>> deferred to the workqueue.
> > > >>>
> > > >>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> > > >>> deferred to the workqueue.
> > > >>>
> > > >>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> > > >>> commit_tail and commit #2 completes, freeing dm_state #1.
> > > >>>
> > > >>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> > > >>> 1 and dereferences a freelist pointer while setting the context.
> > > >>
> > > >> Well I only have a one mile high view on this, but why don't you let
> > > >> the work items execute in order?
> > > >>
> > > >> That would be better anyway cause this way we don't trigger a cache
> > > >> line ping pong between CPUs.
> > > >>
> > > >> Christian.
> > > >
> > > > We use the DRM helpers for managing drm_atomic_commit_state and those
> > > > helpers internally push non-blocking commit work into the system
> > > > unbound work queue.
> > >
> > > Mhm, well if you send those helper atomic commits in the order A,B and
> > > they execute it in the order B,A I would call that a bug :)
> >
> > The way it works is it pushes all commits into unbound work queue, but
> > then forces serialization as needed. We do _not_ want e.g. updates on
> > different CRTC to be serialized, that would result in lots of judder.
> > And hw is funny enough that there's all kinds of dependencies.
> >
> > The way you force synchronization is by adding other CRTC state
> > objects. So if DC is busted and can only handle a single update per
> > work item, then I guess you always need all CRTC states and everything
> > will be run in order. But that also totally kills modern multi-screen
> > compositors. Xorg isn't modern, just in case that's not clear :-)
> >
> > Lucking at the code it seems like you indeed have only a single dm
> > state, so yeah global sync is what you'll need as immediate fix, and
> > then maybe fix up DM to not be quite so silly ... or at least only do
> > the dm state stuff when really needed.
> >
> > We could also sprinkle the drm_crtc_commit structure around a bit
> > (it's the glue that provides the synchronization across commits), but
> > since your dm state is global just grabbing all crtc states
> > unconditionally as part of that is probably best.
> >
> > > > While we could duplicate a copy of that code with nothing but the
> > > > workqueue changed that isn't something I'd really like to maintain
> > > > going forward.
> > >
> > > I'm not talking about duplicating the code, I'm talking about fixing the
> > > helpers. I don't know that code well, but from the outside it sounds
> > > like a bug there.
> > >
> > > And executing work items in the order they are submitted is trivial.
> > >
> > > Had anybody pinged Daniel or other people familiar with the helper code
> > > about it?
> >
> > Yeah something is wrong here, and the fix looks horrible :-)
> >
> > Aside, I've also seen some recent discussion flare up about
> > drm_atomic_state_get/put used to paper over some other use-after-free,
> > but this time related to interrupt handlers. Maybe a few rules about
> > that:
> > - dont
> > - especially not when it's interrupt handlers, because you can't call
> > drm_atomic_state_put from interrupt handlers.
> >
> > Instead have an spin_lock_irq to protect the shared date with your
> > interrupt handler, and _copy_ the date over. This is e.g. what
> > drm_crtc_arm_vblank_event does.
>
> Nicholas wrote a patch that attempted to resolve the issue by adding every
> CRTC into the commit to use use the stall checks. [1] While this forces
> synchronisation on commits, it's kind of a hacky method that may take a
> toll on performance.
>
> Is it possible to have a DRM helper that forces synchronisation on some
> commits without having to add every CRTC into the commit?
>
> Also, is synchronisation really necessary for fast updates in amdgpu?
> I'll admit, the idea of eliminating the use-after-free bug by eliminating
> the use entirely doesn't seem ideal; but is forcing synchronisation on
> these updates that much better?

Well clearing the dc_state pointer here and then allocating another
one in atomic_commit_tail also looks fishy. The proper fix is probably
a lot 

[PATCH] drm/amdgpu/dc: Stop dma_resv_lock inversion in commit_tail

2020-07-27 Thread Daniel Vetter
Trying to grab dma_resv_lock while in commit_tail before we've done
all the code that leads to the eventual signalling of the vblank event
(which can be a dma_fence) is deadlock-y. Don't do that.

Here the solution is easy because just grabbing locks to read
something races anyway. We don't need to bother, READ_ONCE is
equivalent. And avoids the locking issue.

v2: Also take into account tmz_surface boolean, plus just delete the
old code.

Cc: linux-me...@vger.kernel.org
Cc: linaro-mm-...@lists.linaro.org
Cc: linux-r...@vger.kernel.org
Cc: amd-gfx@lists.freedesktop.org
Cc: intel-...@lists.freedesktop.org
Cc: Chris Wilson 
Cc: Maarten Lankhorst 
Cc: Christian König 
Signed-off-by: Daniel Vetter 
---
DC-folks, I think this split out patch from my series here

https://lore.kernel.org/dri-devel/20200707201229.472834-1-daniel.vet...@ffwll.ch/

should be ready for review/merging. I fixed it up a bit so that it's not
just a gross hack :-)

Cheers, Daniel


---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 ++-
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 21ec64fe5527..a20b62b1f2ef 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6959,20 +6959,13 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
DRM_ERROR("Waiting for fences timed out!");
 
/*
-* TODO This might fail and hence better not used, wait
-* explicitly on fences instead
-* and in general should be called for
-* blocking commit to as per framework helpers
+* We cannot reserve buffers here, which means the normal flag
+* access functions don't work. Paper over this with READ_ONCE,
+* but maybe the flags are invariant enough that not even that
+* would be needed.
 */
-   r = amdgpu_bo_reserve(abo, true);
-   if (unlikely(r != 0))
-   DRM_ERROR("failed to reserve buffer before flip\n");
-
-   amdgpu_bo_get_tiling_flags(abo, _flags);
-
-   tmz_surface = amdgpu_bo_encrypted(abo);
-
-   amdgpu_bo_unreserve(abo);
+   tiling_flags = READ_ONCE(abo->tiling_flags);
+   tmz_surface = READ_ONCE(abo->flags) & 
AMDGPU_GEM_CREATE_ENCRYPTED;
 
fill_dc_plane_info_and_addr(
dm->adev, new_plane_state, tiling_flags,
-- 
2.27.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 11/15] drm/amd/display: dchubbub p-state warning during surface planes switch

2020-07-27 Thread Sasha Levin
Hi

[This is an automated email]

This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: all

The bot has tested the following trees: v5.7.10, v5.4.53, v4.19.134, v4.14.189, 
v4.9.231, v4.4.231.

v5.7.10: Build OK!
v5.4.53: Build OK!
v4.19.134: Failed to apply! Possible dependencies:
1e7e86c43f38 ("drm/amd/display: decouple front and backend pgm using 
dpms_off as backend enable flag")
24f7dd7ea98d ("drm/amd/display: move pplib/smu notification to dccg block")
4c5e8b541527 ("drm/amd/display: split dccg clock manager into asic folders")
8c3db1284a01 ("drm/amdgpu: fill in 
amdgpu_dm_remove_sink_from_freesync_module")
98e6436d3af5 ("drm/amd/display: Refactor FreeSync module")
ad908423ef86 ("drm/amd/display: support 48 MHZ refclk off")
c85e6e546edd ("drm/amd/display: Create new i2c resource")

v4.14.189: Failed to apply! Possible dependencies:
1b0c0f9dc5ca ("drm/amdgpu: move userptr BOs to CPU domain during CS v2")
1ed3d2567c80 ("drm/amdgpu: keep the MMU lock until the update ends v4")
3fe89771cb0a ("drm/amdgpu: stop reserving the BO in the MMU callback v3")
4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)")
4c5e8b541527 ("drm/amd/display: split dccg clock manager into asic folders")
60de1c1740f3 ("drm/amdgpu: use a rw_semaphore for MMU notifiers")
9a18999640fa ("drm/amdgpu: move MMU notifier related defines to 
amdgpu_mn.h")
9cca0b8e5df0 ("drm/amdgpu: move amdgpu_cs_sysvm_access_required into 
find_mapping")
a216ab09955d ("drm/amdgpu: fix userptr put_page handling")
b72cf4fca2bb ("drm/amdgpu: move taking mmap_sem into get_user_pages v2")
ca666a3c298f ("drm/amdgpu: stop using BO status for user pages")

v4.9.231: Failed to apply! Possible dependencies:
1cec20f0ea0e ("dma-buf: Restart reservation_object_wait_timeout_rcu() after 
writes")
248a1d6f1ac4 ("drm/amd: fix include notation and remove -Iinclude/drm flag")
4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)")
4c5e8b541527 ("drm/amd/display: split dccg clock manager into asic folders")
78010cd9736e ("dma-buf/fence: add an lockdep_assert_held()")
f54d1867005c ("dma-buf: Rename struct fence to dma_fence")
fedf54132d24 ("dma-buf: Restart reservation_object_get_fences_rcu() after 
writes")

v4.4.231: Failed to apply! Possible dependencies:
0f477c6dea70 ("staging/android/sync: add sync_fence_create_dma")
1f7371b2a5fa ("drm/amd/powerplay: add basic powerplay framework")
248a1d6f1ac4 ("drm/amd: fix include notation and remove -Iinclude/drm flag")
288912cb95d1 ("drm/amdgpu: use $(src) in Makefile (v2)")
375fb53ec1be ("staging: android: replace explicit NULL comparison")
395dec6f6bc5 ("Documentation: add doc for sync_file_get_fence()")
4325198180e5 ("drm/amdgpu: remove GART page addr array")
4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)")
4c5e8b541527 ("drm/amd/display: split dccg clock manager into asic folders")
62304fb1fc08 ("dma-buf/sync_file: de-stage sync_file")
a1d29476d666 ("drm/amdgpu: optionally enable GART debugfs file")
a8fe58cec351 ("drm/amd: add ACP driver support")
b70f014d58b9 ("drm/amdgpu: change default sched jobs to 32")
c784c82a3fd6 ("Documentation: add Sync File doc")
d4cab38e153d ("staging/android: prepare sync_file for de-staging")
d7fdb0ae9d11 ("staging/android: rename sync_fence to sync_file")
f54d1867005c ("dma-buf: Rename struct fence to dma_fence")
fac8434dab96 ("Documentation: Fix some grammar mistakes in sync_file.txt")
fdba11f4079e ("drm/amdgpu: move all Kconfig options to amdgpu/Kconfig")


NOTE: The patch will not be queued to stable trees until it is upstream.

How should we proceed with this patch?

-- 
Thanks
Sasha
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Clear dm_state for fast updates

2020-07-27 Thread Daniel Vetter
On Mon, Jul 27, 2020 at 10:29 PM Daniel Vetter  wrote:
>
> On Mon, Jul 27, 2020 at 9:28 PM Christian König
>  wrote:
> >
> > Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
> > > On 2020-07-27 9:39 a.m., Christian König wrote:
> > >> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> > >>> This patch fixes a race condition that causes a use-after-free during
> > >>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
> > >>> commits
> > >>> are requested and the second one finishes before the first.
> > >>> Essentially,
> > >>> this bug occurs when the following sequence of events happens:
> > >>>
> > >>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> > >>> deferred to the workqueue.
> > >>>
> > >>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> > >>> deferred to the workqueue.
> > >>>
> > >>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> > >>> commit_tail and commit #2 completes, freeing dm_state #1.
> > >>>
> > >>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> > >>> 1 and dereferences a freelist pointer while setting the context.
> > >>
> > >> Well I only have a one mile high view on this, but why don't you let
> > >> the work items execute in order?
> > >>
> > >> That would be better anyway cause this way we don't trigger a cache
> > >> line ping pong between CPUs.
> > >>
> > >> Christian.
> > >
> > > We use the DRM helpers for managing drm_atomic_commit_state and those
> > > helpers internally push non-blocking commit work into the system
> > > unbound work queue.
> >
> > Mhm, well if you send those helper atomic commits in the order A,B and
> > they execute it in the order B,A I would call that a bug :)
>
> The way it works is it pushes all commits into unbound work queue, but
> then forces serialization as needed. We do _not_ want e.g. updates on
> different CRTC to be serialized, that would result in lots of judder.
> And hw is funny enough that there's all kinds of dependencies.
>
> The way you force synchronization is by adding other CRTC state
> objects. So if DC is busted and can only handle a single update per
> work item, then I guess you always need all CRTC states and everything
> will be run in order. But that also totally kills modern multi-screen
> compositors. Xorg isn't modern, just in case that's not clear :-)
>
> Lucking at the code it seems like you indeed have only a single dm
> state, so yeah global sync is what you'll need as immediate fix, and
> then maybe fix up DM to not be quite so silly ... or at least only do
> the dm state stuff when really needed.

Just looked a bit more at this struct dc_state, and that looks a lot
like an atomic side-wagon. I don't think that works as a private
state, this should probably be embedded into a subclass of
drm_atomic_state.

And probably a lot of these pointers moved to other places I think, or
I'm not entirely clear on what exactly this stuff is needed for ...

dc_state is also refcounted, which is definitely rather funny for a
state structure.

Feels like this entire thing (how the overall dc state machinery is
glued into atomic) isn't quite thought thru just yet :-/
-Daniel

> We could also sprinkle the drm_crtc_commit structure around a bit
> (it's the glue that provides the synchronization across commits), but
> since your dm state is global just grabbing all crtc states
> unconditionally as part of that is probably best.
>
> > > While we could duplicate a copy of that code with nothing but the
> > > workqueue changed that isn't something I'd really like to maintain
> > > going forward.
> >
> > I'm not talking about duplicating the code, I'm talking about fixing the
> > helpers. I don't know that code well, but from the outside it sounds
> > like a bug there.
> >
> > And executing work items in the order they are submitted is trivial.
> >
> > Had anybody pinged Daniel or other people familiar with the helper code
> > about it?
>
> Yeah something is wrong here, and the fix looks horrible :-)
>
> Aside, I've also seen some recent discussion flare up about
> drm_atomic_state_get/put used to paper over some other use-after-free,
> but this time related to interrupt handlers. Maybe a few rules about
> that:
> - dont
> - especially not when it's interrupt handlers, because you can't call
> drm_atomic_state_put from interrupt handlers.
>
> Instead have an spin_lock_irq to protect the shared date with your
> interrupt handler, and _copy_ the date over. This is e.g. what
> drm_crtc_arm_vblank_event does.
>
> Cheers, Daniel
>
> >
> > Regards,
> > Christian.
> >
> > >
> > > Regards,
> > > Nicholas Kazlauskas
> > >
> > >>
> > >>>
> > >>> Since this bug has only been spotted with fast commits, this patch
> > >>> fixes
> > >>> the bug by clearing the dm_state instead of using the old dc_state for
> > >>> fast updates. In addition, since dm_state is only used for its dc_state
> > >>> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is

Re: [PATCH] drm/amd/display: Clear dm_state for fast updates

2020-07-27 Thread Daniel Vetter
On Mon, Jul 27, 2020 at 9:28 PM Christian König
 wrote:
>
> Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
> > On 2020-07-27 9:39 a.m., Christian König wrote:
> >> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> >>> This patch fixes a race condition that causes a use-after-free during
> >>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
> >>> commits
> >>> are requested and the second one finishes before the first.
> >>> Essentially,
> >>> this bug occurs when the following sequence of events happens:
> >>>
> >>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> >>> deferred to the workqueue.
> >>>
> >>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> >>> deferred to the workqueue.
> >>>
> >>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> >>> commit_tail and commit #2 completes, freeing dm_state #1.
> >>>
> >>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> >>> 1 and dereferences a freelist pointer while setting the context.
> >>
> >> Well I only have a one mile high view on this, but why don't you let
> >> the work items execute in order?
> >>
> >> That would be better anyway cause this way we don't trigger a cache
> >> line ping pong between CPUs.
> >>
> >> Christian.
> >
> > We use the DRM helpers for managing drm_atomic_commit_state and those
> > helpers internally push non-blocking commit work into the system
> > unbound work queue.
>
> Mhm, well if you send those helper atomic commits in the order A,B and
> they execute it in the order B,A I would call that a bug :)

The way it works is it pushes all commits into unbound work queue, but
then forces serialization as needed. We do _not_ want e.g. updates on
different CRTC to be serialized, that would result in lots of judder.
And hw is funny enough that there's all kinds of dependencies.

The way you force synchronization is by adding other CRTC state
objects. So if DC is busted and can only handle a single update per
work item, then I guess you always need all CRTC states and everything
will be run in order. But that also totally kills modern multi-screen
compositors. Xorg isn't modern, just in case that's not clear :-)

Lucking at the code it seems like you indeed have only a single dm
state, so yeah global sync is what you'll need as immediate fix, and
then maybe fix up DM to not be quite so silly ... or at least only do
the dm state stuff when really needed.

We could also sprinkle the drm_crtc_commit structure around a bit
(it's the glue that provides the synchronization across commits), but
since your dm state is global just grabbing all crtc states
unconditionally as part of that is probably best.

> > While we could duplicate a copy of that code with nothing but the
> > workqueue changed that isn't something I'd really like to maintain
> > going forward.
>
> I'm not talking about duplicating the code, I'm talking about fixing the
> helpers. I don't know that code well, but from the outside it sounds
> like a bug there.
>
> And executing work items in the order they are submitted is trivial.
>
> Had anybody pinged Daniel or other people familiar with the helper code
> about it?

Yeah something is wrong here, and the fix looks horrible :-)

Aside, I've also seen some recent discussion flare up about
drm_atomic_state_get/put used to paper over some other use-after-free,
but this time related to interrupt handlers. Maybe a few rules about
that:
- dont
- especially not when it's interrupt handlers, because you can't call
drm_atomic_state_put from interrupt handlers.

Instead have an spin_lock_irq to protect the shared date with your
interrupt handler, and _copy_ the date over. This is e.g. what
drm_crtc_arm_vblank_event does.

Cheers, Daniel

>
> Regards,
> Christian.
>
> >
> > Regards,
> > Nicholas Kazlauskas
> >
> >>
> >>>
> >>> Since this bug has only been spotted with fast commits, this patch
> >>> fixes
> >>> the bug by clearing the dm_state instead of using the old dc_state for
> >>> fast updates. In addition, since dm_state is only used for its dc_state
> >>> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is
> >>> found,
> >>> removing the dm_state should not have any consequences in fast updates.
> >>>
> >>> This use-after-free bug has existed for a while now, but only caused a
> >>> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub:
> >>> relocate
> >>> freelist pointer to middle of object") moving the freelist pointer from
> >>> dm_state->base (which was unused) to dm_state->context (which is
> >>> dereferenced).
> >>>
> >>> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> >>> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state
> >>> for fast updates")
> >>> Reported-by: Duncan <1i5t5.dun...@cox.net>
> >>> Signed-off-by: Mazin Rezk 
> >>> ---
> >>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36
> >>> ++-
> >>>   1 file changed, 27 insertions(+), 9 

Re:

2020-07-27 Thread Alex Deucher
On Mon, Jul 27, 2020 at 3:46 PM Mauro Rossi  wrote:
>
>
>
> On Mon, Jul 27, 2020 at 8:31 PM Alex Deucher  wrote:
>>
>> On Sun, Jul 26, 2020 at 11:31 AM Mauro Rossi  wrote:
>> >
>> > Hello,
>> >
>> > On Fri, Jul 24, 2020 at 8:31 PM Alex Deucher  wrote:
>> >>
>> >> On Wed, Jul 22, 2020 at 3:57 AM Mauro Rossi  wrote:
>> >> >
>> >> > Hello,
>> >> > re-sending and copying full DL
>> >> >
>> >> > On Wed, Jul 22, 2020 at 4:51 AM Alex Deucher  
>> >> > wrote:
>> >> >>
>> >> >> On Mon, Jul 20, 2020 at 6:00 AM Mauro Rossi  
>> >> >> wrote:
>> >> >> >
>> >> >> > Hi Christian,
>> >> >> >
>> >> >> > On Mon, Jul 20, 2020 at 11:00 AM Christian König
>> >> >> >  wrote:
>> >> >> > >
>> >> >> > > Hi Mauro,
>> >> >> > >
>> >> >> > > I'm not deep into the whole DC design, so just some general high 
>> >> >> > > level
>> >> >> > > comments on the cover letter:
>> >> >> > >
>> >> >> > > 1. Please add a subject line to the cover letter, my spam filter 
>> >> >> > > thinks
>> >> >> > > that this is suspicious otherwise.
>> >> >> >
>> >> >> > My mistake in the editing of covert letter with git send-email,
>> >> >> > I may have forgot to keep the Subject at the top
>> >> >> >
>> >> >> > >
>> >> >> > > 2. Then you should probably note how well (badly?) is that tested. 
>> >> >> > > Since
>> >> >> > > you noted proof of concept it might not even work.
>> >> >> >
>> >> >> > The Changelog is to be read as:
>> >> >> >
>> >> >> > [RFC] was the initial Proof of concept was the RFC and [PATCH v2] was
>> >> >> > just a rebase onto amd-staging-drm-next
>> >> >> >
>> >> >> > this series [PATCH v3] has all the known changes required for DCE6 
>> >> >> > specificity
>> >> >> > and based on a long offline thread with Alexander Deutcher and past
>> >> >> > dri-devel chats with Harry Wentland.
>> >> >> >
>> >> >> > It was tested for my possibilities of testing with HD7750 and HD7950,
>> >> >> > with checks in dmesg output for not getting "missing registers/masks"
>> >> >> > kernel WARNING
>> >> >> > and with kernel build on Ubuntu 20.04 and with android-x86
>> >> >> >
>> >> >> > The proposal I made to Alex is that AMD testing systems will be used
>> >> >> > for further regression testing,
>> >> >> > as part of review and validation for eligibility to 
>> >> >> > amd-staging-drm-next
>> >> >> >
>> >> >>
>> >> >> We will certainly test it once it lands, but presumably this is
>> >> >> working on the SI cards you have access to?
>> >> >
>> >> >
>> >> > Yes, most of my testing was done with android-x86  Android CTS (EGL, 
>> >> > GLES2, GLES3, VK)
>> >> >
>> >> > I am also in contact with a person with Firepro W5130M who is running a 
>> >> > piglit session
>> >> >
>> >> > I had bought an HD7850 to test with Pitcairn, but it arrived as 
>> >> > defective so I could not test with Pitcair
>> >> >
>> >> >
>> >> >>
>> >> >> > >
>> >> >> > > 3. How feature complete (HDMI audio?, Freesync?) is it?
>> >> >> >
>> >> >> > All the changes in DC impacting DCE8 (dc/dce80 path) were ported to
>> >> >> > DCE6 (dc/dce60 path) in the last two years from initial submission
>> >> >> >
>> >> >> > >
>> >> >> > > Apart from that it looks like a rather impressive piece of work :)
>> >> >> > >
>> >> >> > > Cheers,
>> >> >> > > Christian.
>> >> >> >
>> >> >> > Thanks,
>> >> >> > please consider that most of the latest DCE6 specific parts were
>> >> >> > possible due to recent Alex support in getting the correct DCE6
>> >> >> > headers,
>> >> >> > his suggestions and continuous feedback.
>> >> >> >
>> >> >> > I would suggest that Alex comments on the proposed next steps to 
>> >> >> > follow.
>> >> >>
>> >> >> The code looks pretty good to me.  I'd like to get some feedback from
>> >> >> the display team to see if they have any concerns, but beyond that I
>> >> >> think we can pull it into the tree and continue improving it there.
>> >> >> Do you have a link to a git tree I can pull directly that contains
>> >> >> these patches?  Is this the right branch?
>> >> >> https://github.com/maurossi/linux/commits/kernel-5.8rc4_si_next
>> >> >>
>> >> >> Thanks!
>> >> >>
>> >> >> Alex
>> >> >
>> >> >
>> >> > The following branch was pushed with the series on top of 
>> >> > amd-staging-drm-next
>> >> >
>> >> > https://github.com/maurossi/linux/commits/kernel-5.6_si_drm-next
>> >>
>> >> I gave this a quick test on all of the SI asics and the various
>> >> monitors I had available and it looks good.  A few minor patches I
>> >> noticed are attached.  If they look good to you, I'll squash them into
>> >> the series when I commit it.  I've pushed it to my fdo tree as well:
>> >> https://cgit.freedesktop.org/~agd5f/linux/log/?h=si_dc_support
>> >>
>> >> Thanks!
>> >>
>> >> Alex
>> >
>> >
>> > The new patches are ok and with the following infomation about piglit 
>> > tests,
>> > the series may be good to go.
>> >
>> > I have performed piglit tests on Tahiti HD7950 on kernel 5.8.0-rc6 with 
>> > AMD DC support for SI
>> > and comparison with vanilla kernel 5.8.0-rc6
>> >
>> > Results are 

Re:

2020-07-27 Thread Mauro Rossi
On Mon, Jul 27, 2020 at 8:31 PM Alex Deucher  wrote:

> On Sun, Jul 26, 2020 at 11:31 AM Mauro Rossi 
> wrote:
> >
> > Hello,
> >
> > On Fri, Jul 24, 2020 at 8:31 PM Alex Deucher 
> wrote:
> >>
> >> On Wed, Jul 22, 2020 at 3:57 AM Mauro Rossi 
> wrote:
> >> >
> >> > Hello,
> >> > re-sending and copying full DL
> >> >
> >> > On Wed, Jul 22, 2020 at 4:51 AM Alex Deucher 
> wrote:
> >> >>
> >> >> On Mon, Jul 20, 2020 at 6:00 AM Mauro Rossi 
> wrote:
> >> >> >
> >> >> > Hi Christian,
> >> >> >
> >> >> > On Mon, Jul 20, 2020 at 11:00 AM Christian König
> >> >> >  wrote:
> >> >> > >
> >> >> > > Hi Mauro,
> >> >> > >
> >> >> > > I'm not deep into the whole DC design, so just some general high
> level
> >> >> > > comments on the cover letter:
> >> >> > >
> >> >> > > 1. Please add a subject line to the cover letter, my spam filter
> thinks
> >> >> > > that this is suspicious otherwise.
> >> >> >
> >> >> > My mistake in the editing of covert letter with git send-email,
> >> >> > I may have forgot to keep the Subject at the top
> >> >> >
> >> >> > >
> >> >> > > 2. Then you should probably note how well (badly?) is that
> tested. Since
> >> >> > > you noted proof of concept it might not even work.
> >> >> >
> >> >> > The Changelog is to be read as:
> >> >> >
> >> >> > [RFC] was the initial Proof of concept was the RFC and [PATCH v2]
> was
> >> >> > just a rebase onto amd-staging-drm-next
> >> >> >
> >> >> > this series [PATCH v3] has all the known changes required for DCE6
> specificity
> >> >> > and based on a long offline thread with Alexander Deutcher and past
> >> >> > dri-devel chats with Harry Wentland.
> >> >> >
> >> >> > It was tested for my possibilities of testing with HD7750 and
> HD7950,
> >> >> > with checks in dmesg output for not getting "missing
> registers/masks"
> >> >> > kernel WARNING
> >> >> > and with kernel build on Ubuntu 20.04 and with android-x86
> >> >> >
> >> >> > The proposal I made to Alex is that AMD testing systems will be
> used
> >> >> > for further regression testing,
> >> >> > as part of review and validation for eligibility to
> amd-staging-drm-next
> >> >> >
> >> >>
> >> >> We will certainly test it once it lands, but presumably this is
> >> >> working on the SI cards you have access to?
> >> >
> >> >
> >> > Yes, most of my testing was done with android-x86  Android CTS (EGL,
> GLES2, GLES3, VK)
> >> >
> >> > I am also in contact with a person with Firepro W5130M who is running
> a piglit session
> >> >
> >> > I had bought an HD7850 to test with Pitcairn, but it arrived as
> defective so I could not test with Pitcair
> >> >
> >> >
> >> >>
> >> >> > >
> >> >> > > 3. How feature complete (HDMI audio?, Freesync?) is it?
> >> >> >
> >> >> > All the changes in DC impacting DCE8 (dc/dce80 path) were ported to
> >> >> > DCE6 (dc/dce60 path) in the last two years from initial submission
> >> >> >
> >> >> > >
> >> >> > > Apart from that it looks like a rather impressive piece of work
> :)
> >> >> > >
> >> >> > > Cheers,
> >> >> > > Christian.
> >> >> >
> >> >> > Thanks,
> >> >> > please consider that most of the latest DCE6 specific parts were
> >> >> > possible due to recent Alex support in getting the correct DCE6
> >> >> > headers,
> >> >> > his suggestions and continuous feedback.
> >> >> >
> >> >> > I would suggest that Alex comments on the proposed next steps to
> follow.
> >> >>
> >> >> The code looks pretty good to me.  I'd like to get some feedback from
> >> >> the display team to see if they have any concerns, but beyond that I
> >> >> think we can pull it into the tree and continue improving it there.
> >> >> Do you have a link to a git tree I can pull directly that contains
> >> >> these patches?  Is this the right branch?
> >> >> https://github.com/maurossi/linux/commits/kernel-5.8rc4_si_next
> >> >>
> >> >> Thanks!
> >> >>
> >> >> Alex
> >> >
> >> >
> >> > The following branch was pushed with the series on top of
> amd-staging-drm-next
> >> >
> >> > https://github.com/maurossi/linux/commits/kernel-5.6_si_drm-next
> >>
> >> I gave this a quick test on all of the SI asics and the various
> >> monitors I had available and it looks good.  A few minor patches I
> >> noticed are attached.  If they look good to you, I'll squash them into
> >> the series when I commit it.  I've pushed it to my fdo tree as well:
> >> https://cgit.freedesktop.org/~agd5f/linux/log/?h=si_dc_support
> >>
> >> Thanks!
> >>
> >> Alex
> >
> >
> > The new patches are ok and with the following infomation about piglit
> tests,
> > the series may be good to go.
> >
> > I have performed piglit tests on Tahiti HD7950 on kernel 5.8.0-rc6 with
> AMD DC support for SI
> > and comparison with vanilla kernel 5.8.0-rc6
> >
> > Results are the following
> >
> > [piglit gpu tests with kernel 5.8.0-rc6-amddcsi]
> >
> > utente@utente-desktop:~/piglit$ ./piglit run gpu .
> > [26714/26714] skip: 1731, pass: 24669, warn: 15, fail: 288, crash: 11
> > Thank you for running Piglit!
> > Results have been written to 

Re: [PATCH] drm/amdgpu/si: initial support for GPU reset

2020-07-27 Thread Christian König

Am 27.07.20 um 19:34 schrieb Alex Deucher:

Ported from radeon.

Signed-off-by: Alex Deucher 


Acked-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/si.c | 92 -
  1 file changed, 90 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 1b449291f068..a7a45f06c8f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1215,10 +1215,98 @@ static bool si_read_bios_from_rom(struct amdgpu_device 
*adev,
return true;
  }
  
-//xxx: not implemented

+static void si_set_clk_bypass_mode(struct amdgpu_device *adev)
+{
+   u32 tmp, i;
+
+   tmp = RREG32(CG_SPLL_FUNC_CNTL);
+   tmp |= SPLL_BYPASS_EN;
+   WREG32(CG_SPLL_FUNC_CNTL, tmp);
+
+   tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
+   tmp |= SPLL_CTLREQ_CHG;
+   WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+
+   for (i = 0; i < adev->usec_timeout; i++) {
+   if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
+   break;
+   udelay(1);
+   }
+
+   tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
+   tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
+   WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+
+   tmp = RREG32(MPLL_CNTL_MODE);
+   tmp &= ~MPLL_MCLK_SEL;
+   WREG32(MPLL_CNTL_MODE, tmp);
+}
+
+static void si_spll_powerdown(struct amdgpu_device *adev)
+{
+   u32 tmp;
+
+   tmp = RREG32(SPLL_CNTL_MODE);
+   tmp |= SPLL_SW_DIR_CONTROL;
+   WREG32(SPLL_CNTL_MODE, tmp);
+
+   tmp = RREG32(CG_SPLL_FUNC_CNTL);
+   tmp |= SPLL_RESET;
+   WREG32(CG_SPLL_FUNC_CNTL, tmp);
+
+   tmp = RREG32(CG_SPLL_FUNC_CNTL);
+   tmp |= SPLL_SLEEP;
+   WREG32(CG_SPLL_FUNC_CNTL, tmp);
+
+   tmp = RREG32(SPLL_CNTL_MODE);
+   tmp &= ~SPLL_SW_DIR_CONTROL;
+   WREG32(SPLL_CNTL_MODE, tmp);
+}
+
+static int si_gpu_pci_config_reset(struct amdgpu_device *adev)
+{
+   u32 i;
+   int r = -EINVAL;
+
+   dev_info(adev->dev, "GPU pci config reset\n");
+
+   /* set mclk/sclk to bypass */
+   si_set_clk_bypass_mode(adev);
+   /* powerdown spll */
+   si_spll_powerdown(adev);
+   /* disable BM */
+   pci_clear_master(adev->pdev);
+   /* reset */
+   amdgpu_device_pci_config_reset(adev);
+
+   udelay(100);
+
+   /* wait for asic to come out of reset */
+   for (i = 0; i < adev->usec_timeout; i++) {
+   if (RREG32(mmCONFIG_MEMSIZE) != 0x) {
+   /* enable BM */
+   pci_set_master(adev->pdev);
+   adev->has_hw_reset = true;
+   r = 0;
+   break;
+   }
+   udelay(1);
+   }
+
+   return r;
+}
+
  static int si_asic_reset(struct amdgpu_device *adev)
  {
-   return 0;
+   int r;
+
+   amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+   r = si_gpu_pci_config_reset(adev);
+
+   amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+   return r;
  }
  
  static bool si_asic_supports_baco(struct amdgpu_device *adev)


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want

2020-07-27 Thread Christian König

Am 27.07.20 um 18:02 schrieb Felix Kuehling:

Am 2020-07-27 um 5:26 a.m. schrieb Christian König:

Am 27.07.20 um 10:21 schrieb Monk Liu:

what:
KCQ cost many clocks during world switch which impacts a lot to multi-VF
performance

how:
introduce a paramter to control the number of KCQ to avoid performance
drop if there is no KQC needed

notes:
this paramter only affects gfx 8/9/10

Sounds like a good idea to me, but that needs a different name.
Outside AMD most people don't know what a KCQ is.

Just use compute queue or similar as name for this.

Just "compute queue" would be confusing for ROCm users. Maybe "legacy
compute queues"?


"kernel compute queues" is just fine, we just shouldn't shorten it.

And we should especially drop the "_user_set" postfix.

Regards,
Christian.



Regards,
   Felix



Another comment below.


Signed-off-by: Monk Liu 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +++
   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 
   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 27
+-
   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30
+++--
   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29
++--
   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31
+++---
   7 files changed, 69 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..71a3d6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
   #ifdef CONFIG_DRM_AMDGPU_CIK
   extern int amdgpu_cik_support;
   #endif
+extern int amdgpu_num_kcq_user_set;
     #define AMDGPU_VM_MAX_NUM_CTX    4096
   #define AMDGPU_SG_THRESHOLD    (256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..61c7583 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct
amdgpu_device *adev)
     amdgpu_gmc_tmz_set(adev);
   +    if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)
+    amdgpu_num_kcq_user_set = 8;

This needs a warning or error message if we overwrite invalid user
provided parameters.

Christian.


+
   return 0;
   }
   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..03a94e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
   int amdgpu_force_asic_type = -1;
   int amdgpu_tmz = 0;
   int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq_user_set = 8;
     struct amdgpu_mgpu_info mgpu_info = {
   .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
   MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto
(default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
   module_param_named(reset_method, amdgpu_reset_method, int, 0444);
   +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if
set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
+
   static const struct pci_device_id pciidlist[] = {
   #ifdef  CONFIG_DRM_AMDGPU_SI
   {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..0b59049 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,7 +202,7 @@ bool
amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
     void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
   {
-    int i, queue, pipe, mec;
+    int i, queue, pipe, mec, j = 0;
   bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
     /* policy for amdgpu compute queue ownership */
@@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct
amdgpu_device *adev)
     if (multipipe_policy) {
   /* policy: amdgpu owns the first two queues of the
first MEC */
-    if (mec == 0 && queue < 2)
-    set_bit(i, adev->gfx.mec.queue_bitmap);
+    if (mec == 0 && queue < 2) {
+    if (j++ < adev->gfx.num_compute_rings)
+    set_bit(i, adev->gfx.mec.queue_bitmap);
+    else
+    break;
+    }
   } else {
   /* policy: amdgpu owns all queues in the first pipe */
-    if (mec == 0 && pipe == 0)
-    set_bit(i, adev->gfx.mec.queue_bitmap);
+    if (mec == 0 && pipe == 0) {
+    if (j++ < adev->gfx.num_compute_rings)
+    

Re: [PATCH] drm/amd/display: Clear dm_state for fast updates

2020-07-27 Thread Christian König

Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:

On 2020-07-27 9:39 a.m., Christian König wrote:

Am 27.07.20 um 07:40 schrieb Mazin Rezk:

This patch fixes a race condition that causes a use-after-free during
amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking 
commits
are requested and the second one finishes before the first. 
Essentially,

this bug occurs when the following sequence of events happens:

1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
deferred to the workqueue.

2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
deferred to the workqueue.

3. Commit #2 starts before commit #1, dm_state #1 is used in the
commit_tail and commit #2 completes, freeing dm_state #1.

4. Commit #1 starts after commit #2 completes, uses the freed dm_state
1 and dereferences a freelist pointer while setting the context.


Well I only have a one mile high view on this, but why don't you let 
the work items execute in order?


That would be better anyway cause this way we don't trigger a cache 
line ping pong between CPUs.


Christian.


We use the DRM helpers for managing drm_atomic_commit_state and those 
helpers internally push non-blocking commit work into the system 
unbound work queue.


Mhm, well if you send those helper atomic commits in the order A,B and 
they execute it in the order B,A I would call that a bug :)


While we could duplicate a copy of that code with nothing but the 
workqueue changed that isn't something I'd really like to maintain 
going forward.


I'm not talking about duplicating the code, I'm talking about fixing the 
helpers. I don't know that code well, but from the outside it sounds 
like a bug there.


And executing work items in the order they are submitted is trivial.

Had anybody pinged Daniel or other people familiar with the helper code 
about it?


Regards,
Christian.



Regards,
Nicholas Kazlauskas





Since this bug has only been spotted with fast commits, this patch 
fixes

the bug by clearing the dm_state instead of using the old dc_state for
fast updates. In addition, since dm_state is only used for its dc_state
and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is 
found,

removing the dm_state should not have any consequences in fast updates.

This use-after-free bug has existed for a while now, but only caused a
noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: 
relocate

freelist pointer to middle of object") moving the freelist pointer from
dm_state->base (which was unused) to dm_state->context (which is
dereferenced).

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state 
for fast updates")

Reported-by: Duncan <1i5t5.dun...@cox.net>
Signed-off-by: Mazin Rezk 
---
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 
++-

  1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

index 86ffa0c2880f..710edc70e37e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct 
drm_device *dev,

   * the same resource. If we have a new DC context as part of
   * the DM atomic state from validation we need to free it and
   * retain the existing one instead.
+ *
+ * Furthermore, since the DM atomic state only contains the DC
+ * context and can safely be annulled, we can free the state
+ * and clear the associated private object now to free
+ * some memory and avoid a possible use-after-free later.
   */
-    struct dm_atomic_state *new_dm_state, *old_dm_state;

-    new_dm_state = dm_atomic_get_new_state(state);
-    old_dm_state = dm_atomic_get_old_state(state);
+    for (i = 0; i < state->num_private_objs; i++) {
+    struct drm_private_obj *obj = state->private_objs[i].ptr;

-    if (new_dm_state && old_dm_state) {
-    if (new_dm_state->context)
-    dc_release_state(new_dm_state->context);
+    if (obj->funcs == adev->dm.atomic_obj.funcs) {
+    int j = state->num_private_objs-1;

-    new_dm_state->context = old_dm_state->context;
+    dm_atomic_destroy_state(obj,
+    state->private_objs[i].state);
+
+    /* If i is not at the end of the array then the
+ * last element needs to be moved to where i was
+ * before the array can safely be truncated.
+ */
+    if (i != j)
+    state->private_objs[i] =
+    state->private_objs[j];

-    if (old_dm_state->context)
-    dc_retain_state(old_dm_state->context);
+    state->private_objs[j].ptr = NULL;
+  

Re: [PATCH] drm/amdgpu: fix PSP autoload twice in FLR

2020-07-27 Thread Luben Tuikov
On 2020-07-27 7:03 a.m., Liu ChengZhe wrote:
> the block->status.hw = false assignment will overwrite PSP's previous
> hw status, which will cause PSP execute resume operation after hw init.
> 
> Signed-off-by: Liu ChengZhe 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 62ecac97fbd2..88c681957d39 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2574,6 +2574,10 @@ static int amdgpu_device_ip_reinit_early_sriov(struct 
> amdgpu_device *adev)
>   AMD_IP_BLOCK_TYPE_IH,
>   };
>  
> + for (i = 0; i < adev->num_ip_blocks; i++) {
> + adev->ip_blocks[i].status.hw = false;
> + }
> +

Braces surrounding a single statement block are unnecessary
and "checkpatch" complains about it. Just remote the braces
around a single statement block in loops.

Regards,
Luben

>   for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
>   int j;
>   struct amdgpu_ip_block *block;
> @@ -2581,7 +2585,6 @@ static int amdgpu_device_ip_reinit_early_sriov(struct 
> amdgpu_device *adev)
>   for (j = 0; j < adev->num_ip_blocks; j++) {
>   block = >ip_blocks[j];
>  
> - block->status.hw = false;
>   if (block->version->type != ip_order[i] ||
>   !block->status.valid)
>   continue;
> 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm amdgpu: Skip tmr load for SRIOV

2020-07-27 Thread Luben Tuikov
On 2020-07-27 6:57 a.m., Liu ChengZhe wrote:
> From: root 
> 
> 1. For Navi12, CHIP_SIENNA_CICHLID, skip tmr load operation;
> 2. Check pointer before release firmware.
> 
> Signed-off-by: root 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 40 +
>  1 file changed, 34 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index a053b7af0680..a9481e112cb3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -193,12 +193,18 @@ static int psp_sw_fini(void *handle)
>   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>  
>   psp_memory_training_fini(>psp);
> - release_firmware(adev->psp.sos_fw);
> - adev->psp.sos_fw = NULL;
> - release_firmware(adev->psp.asd_fw);
> - adev->psp.asd_fw = NULL;
> - release_firmware(adev->psp.ta_fw);
> - adev->psp.ta_fw = NULL;
> + if (adev->psp.sos_fw) {
> + release_firmware(adev->psp.sos_fw);
> + adev->psp.sos_fw = NULL;
> + }
> + if (adev->psp.asd_fw) {
> + release_firmware(adev->psp.asd_fw);
> + adev->psp.asd_fw = NULL;
> + }
> + if (adev->psp.ta_fw) {
> + release_firmware(adev->psp.ta_fw);
> + adev->psp.ta_fw = NULL;
> + }
>  
>   if (adev->asic_type == CHIP_NAVI10)
>   psp_sysfs_fini(adev);
> @@ -409,11 +415,33 @@ static int psp_clear_vf_fw(struct psp_context *psp)
>   return ret;
>  }
>  
> +static bool psp_skip_tmr(struct psp_context *psp)
> +{
> + bool ret = false;
> +
> + switch (psp->adev->asic_type) {
> + case CHIP_NAVI12:
> + case CHIP_SIENNA_CICHLID:
> + ret = true;
> + break;
> + default:
> + return false;
> + }
> +
> + return ret;
> +}

There's no need for the local "bool ret". Remove it.
See in the "default:" case you already do "return false;".
Do "return true;" in the NAVI12 and SIENNA CICHLID case.

Regards,
Luben


> +
>  static int psp_tmr_load(struct psp_context *psp)
>  {
>   int ret;
>   struct psp_gfx_cmd_resp *cmd;
>  
> + /* for Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not setup TMR
> +  * (already setup by host driver)
> +  */
> + if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
> + return 0;
> +
>   cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
>   if (!cmd)
>   return -ENOMEM;
> 

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re:

2020-07-27 Thread Alex Deucher
On Sun, Jul 26, 2020 at 11:31 AM Mauro Rossi  wrote:
>
> Hello,
>
> On Fri, Jul 24, 2020 at 8:31 PM Alex Deucher  wrote:
>>
>> On Wed, Jul 22, 2020 at 3:57 AM Mauro Rossi  wrote:
>> >
>> > Hello,
>> > re-sending and copying full DL
>> >
>> > On Wed, Jul 22, 2020 at 4:51 AM Alex Deucher  wrote:
>> >>
>> >> On Mon, Jul 20, 2020 at 6:00 AM Mauro Rossi  wrote:
>> >> >
>> >> > Hi Christian,
>> >> >
>> >> > On Mon, Jul 20, 2020 at 11:00 AM Christian König
>> >> >  wrote:
>> >> > >
>> >> > > Hi Mauro,
>> >> > >
>> >> > > I'm not deep into the whole DC design, so just some general high level
>> >> > > comments on the cover letter:
>> >> > >
>> >> > > 1. Please add a subject line to the cover letter, my spam filter 
>> >> > > thinks
>> >> > > that this is suspicious otherwise.
>> >> >
>> >> > My mistake in the editing of covert letter with git send-email,
>> >> > I may have forgot to keep the Subject at the top
>> >> >
>> >> > >
>> >> > > 2. Then you should probably note how well (badly?) is that tested. 
>> >> > > Since
>> >> > > you noted proof of concept it might not even work.
>> >> >
>> >> > The Changelog is to be read as:
>> >> >
>> >> > [RFC] was the initial Proof of concept was the RFC and [PATCH v2] was
>> >> > just a rebase onto amd-staging-drm-next
>> >> >
>> >> > this series [PATCH v3] has all the known changes required for DCE6 
>> >> > specificity
>> >> > and based on a long offline thread with Alexander Deutcher and past
>> >> > dri-devel chats with Harry Wentland.
>> >> >
>> >> > It was tested for my possibilities of testing with HD7750 and HD7950,
>> >> > with checks in dmesg output for not getting "missing registers/masks"
>> >> > kernel WARNING
>> >> > and with kernel build on Ubuntu 20.04 and with android-x86
>> >> >
>> >> > The proposal I made to Alex is that AMD testing systems will be used
>> >> > for further regression testing,
>> >> > as part of review and validation for eligibility to amd-staging-drm-next
>> >> >
>> >>
>> >> We will certainly test it once it lands, but presumably this is
>> >> working on the SI cards you have access to?
>> >
>> >
>> > Yes, most of my testing was done with android-x86  Android CTS (EGL, 
>> > GLES2, GLES3, VK)
>> >
>> > I am also in contact with a person with Firepro W5130M who is running a 
>> > piglit session
>> >
>> > I had bought an HD7850 to test with Pitcairn, but it arrived as defective 
>> > so I could not test with Pitcair
>> >
>> >
>> >>
>> >> > >
>> >> > > 3. How feature complete (HDMI audio?, Freesync?) is it?
>> >> >
>> >> > All the changes in DC impacting DCE8 (dc/dce80 path) were ported to
>> >> > DCE6 (dc/dce60 path) in the last two years from initial submission
>> >> >
>> >> > >
>> >> > > Apart from that it looks like a rather impressive piece of work :)
>> >> > >
>> >> > > Cheers,
>> >> > > Christian.
>> >> >
>> >> > Thanks,
>> >> > please consider that most of the latest DCE6 specific parts were
>> >> > possible due to recent Alex support in getting the correct DCE6
>> >> > headers,
>> >> > his suggestions and continuous feedback.
>> >> >
>> >> > I would suggest that Alex comments on the proposed next steps to follow.
>> >>
>> >> The code looks pretty good to me.  I'd like to get some feedback from
>> >> the display team to see if they have any concerns, but beyond that I
>> >> think we can pull it into the tree and continue improving it there.
>> >> Do you have a link to a git tree I can pull directly that contains
>> >> these patches?  Is this the right branch?
>> >> https://github.com/maurossi/linux/commits/kernel-5.8rc4_si_next
>> >>
>> >> Thanks!
>> >>
>> >> Alex
>> >
>> >
>> > The following branch was pushed with the series on top of 
>> > amd-staging-drm-next
>> >
>> > https://github.com/maurossi/linux/commits/kernel-5.6_si_drm-next
>>
>> I gave this a quick test on all of the SI asics and the various
>> monitors I had available and it looks good.  A few minor patches I
>> noticed are attached.  If they look good to you, I'll squash them into
>> the series when I commit it.  I've pushed it to my fdo tree as well:
>> https://cgit.freedesktop.org/~agd5f/linux/log/?h=si_dc_support
>>
>> Thanks!
>>
>> Alex
>
>
> The new patches are ok and with the following infomation about piglit tests,
> the series may be good to go.
>
> I have performed piglit tests on Tahiti HD7950 on kernel 5.8.0-rc6 with AMD 
> DC support for SI
> and comparison with vanilla kernel 5.8.0-rc6
>
> Results are the following
>
> [piglit gpu tests with kernel 5.8.0-rc6-amddcsi]
>
> utente@utente-desktop:~/piglit$ ./piglit run gpu .
> [26714/26714] skip: 1731, pass: 24669, warn: 15, fail: 288, crash: 11
> Thank you for running Piglit!
> Results have been written to /home/utente/piglit
>
> [piglit gpu tests with vanilla 5.8.0-rc6]
>
> utente@utente-desktop:~/piglit$ ./piglit run gpu .
> [26714/26714] skip: 1731, pass: 24673, warn: 13, fail: 283, crash: 14
> Thank you for running Piglit!
> Results have been written to /home/utente/piglit
>
> In the attachment the 

Re: [PATCH 1/2] drm/radeon: switch from 'pci_' to 'dma_' API

2020-07-27 Thread Alex Deucher
Applied.  Thanks!

Alex

On Mon, Jul 27, 2020 at 9:42 AM Christian König
 wrote:
>
> Am 27.07.20 um 12:34 schrieb Christophe JAILLET:
> > The wrappers in include/linux/pci-dma-compat.h should go away.
> >
> > The patch has been generated with the coccinelle script below and has been
> > hand modified to replace GFP_ with a correct flag.
> > It has been compile tested.
> >
> > When memory is allocated in 'radeon_gart_table_ram_alloc()' GFP_KERNEL
> > can be used because its callers already use this flag.
> >
> > Both 'r100_pci_gart_init()' (r100.c) and 'rs400_gart_init()' (rs400.c)
> > call 'radeon_gart_init()'.
> > This function uses 'vmalloc'.
> >
> >
> > @@
> > @@
> > -PCI_DMA_BIDIRECTIONAL
> > +DMA_BIDIRECTIONAL
> >
> > @@
> > @@
> > -PCI_DMA_TODEVICE
> > +DMA_TO_DEVICE
> >
> > @@
> > @@
> > -PCI_DMA_FROMDEVICE
> > +DMA_FROM_DEVICE
> >
> > @@
> > @@
> > -PCI_DMA_NONE
> > +DMA_NONE
> >
> > @@
> > expression e1, e2, e3;
> > @@
> > -pci_alloc_consistent(e1, e2, e3)
> > +dma_alloc_coherent(>dev, e2, e3, GFP_)
> >
> > @@
> > expression e1, e2, e3;
> > @@
> > -pci_zalloc_consistent(e1, e2, e3)
> > +dma_alloc_coherent(>dev, e2, e3, GFP_)
> >
> > @@
> > expression e1, e2, e3, e4;
> > @@
> > -pci_free_consistent(e1, e2, e3, e4)
> > +dma_free_coherent(>dev, e2, e3, e4)
> >
> > @@
> > expression e1, e2, e3, e4;
> > @@
> > -pci_map_single(e1, e2, e3, e4)
> > +dma_map_single(>dev, e2, e3, e4)
> >
> > @@
> > expression e1, e2, e3, e4;
> > @@
> > -pci_unmap_single(e1, e2, e3, e4)
> > +dma_unmap_single(>dev, e2, e3, e4)
> >
> > @@
> > expression e1, e2, e3, e4, e5;
> > @@
> > -pci_map_page(e1, e2, e3, e4, e5)
> > +dma_map_page(>dev, e2, e3, e4, e5)
> >
> > @@
> > expression e1, e2, e3, e4;
> > @@
> > -pci_unmap_page(e1, e2, e3, e4)
> > +dma_unmap_page(>dev, e2, e3, e4)
> >
> > @@
> > expression e1, e2, e3, e4;
> > @@
> > -pci_map_sg(e1, e2, e3, e4)
> > +dma_map_sg(>dev, e2, e3, e4)
> >
> > @@
> > expression e1, e2, e3, e4;
> > @@
> > -pci_unmap_sg(e1, e2, e3, e4)
> > +dma_unmap_sg(>dev, e2, e3, e4)
> >
> > @@
> > expression e1, e2, e3, e4;
> > @@
> > -pci_dma_sync_single_for_cpu(e1, e2, e3, e4)
> > +dma_sync_single_for_cpu(>dev, e2, e3, e4)
> >
> > @@
> > expression e1, e2, e3, e4;
> > @@
> > -pci_dma_sync_single_for_device(e1, e2, e3, e4)
> > +dma_sync_single_for_device(>dev, e2, e3, e4)
> >
> > @@
> > expression e1, e2, e3, e4;
> > @@
> > -pci_dma_sync_sg_for_cpu(e1, e2, e3, e4)
> > +dma_sync_sg_for_cpu(>dev, e2, e3, e4)
> >
> > @@
> > expression e1, e2, e3, e4;
> > @@
> > -pci_dma_sync_sg_for_device(e1, e2, e3, e4)
> > +dma_sync_sg_for_device(>dev, e2, e3, e4)
> >
> > @@
> > expression e1, e2;
> > @@
> > -pci_dma_mapping_error(e1, e2)
> > +dma_mapping_error(>dev, e2)
> >
> > @@
> > expression e1, e2;
> > @@
> > -pci_set_dma_mask(e1, e2)
> > +dma_set_mask(>dev, e2)
> >
> > @@
> > expression e1, e2;
> > @@
> > -pci_set_consistent_dma_mask(e1, e2)
> > +dma_set_coherent_mask(>dev, e2)
> >
> > Signed-off-by: Christophe JAILLET 
>
> Reviewed-by: Christian König 
>
> > ---
> > If needed, see post from Christoph Hellwig on the kernel-janitors ML:
> > https://marc.info/?l=kernel-janitors=158745678307186=4
> > ---
> >   drivers/gpu/drm/radeon/radeon_gart.c | 9 -
> >   1 file changed, 4 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
> > b/drivers/gpu/drm/radeon/radeon_gart.c
> > index f178ba321715..b7ce254e5663 100644
> > --- a/drivers/gpu/drm/radeon/radeon_gart.c
> > +++ b/drivers/gpu/drm/radeon/radeon_gart.c
> > @@ -72,8 +72,8 @@ int radeon_gart_table_ram_alloc(struct radeon_device 
> > *rdev)
> >   {
> >   void *ptr;
> >
> > - ptr = pci_alloc_consistent(rdev->pdev, rdev->gart.table_size,
> > ->gart.table_addr);
> > + ptr = dma_alloc_coherent(>pdev->dev, rdev->gart.table_size,
> > +  >gart.table_addr, GFP_KERNEL);
> >   if (ptr == NULL) {
> >   return -ENOMEM;
> >   }
> > @@ -110,9 +110,8 @@ void radeon_gart_table_ram_free(struct radeon_device 
> > *rdev)
> > rdev->gart.table_size >> PAGE_SHIFT);
> >   }
> >   #endif
> > - pci_free_consistent(rdev->pdev, rdev->gart.table_size,
> > - (void *)rdev->gart.ptr,
> > - rdev->gart.table_addr);
> > + dma_free_coherent(>pdev->dev, rdev->gart.table_size,
> > +   (void *)rdev->gart.ptr, rdev->gart.table_addr);
> >   rdev->gart.ptr = NULL;
> >   rdev->gart.table_addr = 0;
> >   }
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

Re: [PATCH 2/2] drm/radeon: avoid a useless memset

2020-07-27 Thread Alex Deucher
Applied.  Thanks!

Alex

On Mon, Jul 27, 2020 at 9:41 AM Christian König
 wrote:
>
> Am 27.07.20 um 12:34 schrieb Christophe JAILLET:
> > Avoid a memset after a call to 'dma_alloc_coherent()'.
> > This is useless since
> > commit 518a2f1925c3 ("dma-mapping: zero memory returned from dma_alloc_*")
> >
> > Signed-off-by: Christophe JAILLET 
>
> Reviewed-by: Christian König 
>
> > ---
> >   drivers/gpu/drm/radeon/radeon_gart.c | 1 -
> >   1 file changed, 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
> > b/drivers/gpu/drm/radeon/radeon_gart.c
> > index b7ce254e5663..3808a753127b 100644
> > --- a/drivers/gpu/drm/radeon/radeon_gart.c
> > +++ b/drivers/gpu/drm/radeon/radeon_gart.c
> > @@ -85,7 +85,6 @@ int radeon_gart_table_ram_alloc(struct radeon_device 
> > *rdev)
> >   }
> >   #endif
> >   rdev->gart.ptr = ptr;
> > - memset((void *)rdev->gart.ptr, 0, rdev->gart.table_size);
> >   return 0;
> >   }
> >
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu/si: initial support for GPU reset

2020-07-27 Thread Alex Deucher
Ported from radeon.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/si.c | 92 -
 1 file changed, 90 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 1b449291f068..a7a45f06c8f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1215,10 +1215,98 @@ static bool si_read_bios_from_rom(struct amdgpu_device 
*adev,
return true;
 }
 
-//xxx: not implemented
+static void si_set_clk_bypass_mode(struct amdgpu_device *adev)
+{
+   u32 tmp, i;
+
+   tmp = RREG32(CG_SPLL_FUNC_CNTL);
+   tmp |= SPLL_BYPASS_EN;
+   WREG32(CG_SPLL_FUNC_CNTL, tmp);
+
+   tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
+   tmp |= SPLL_CTLREQ_CHG;
+   WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+
+   for (i = 0; i < adev->usec_timeout; i++) {
+   if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
+   break;
+   udelay(1);
+   }
+
+   tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
+   tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
+   WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+
+   tmp = RREG32(MPLL_CNTL_MODE);
+   tmp &= ~MPLL_MCLK_SEL;
+   WREG32(MPLL_CNTL_MODE, tmp);
+}
+
+static void si_spll_powerdown(struct amdgpu_device *adev)
+{
+   u32 tmp;
+
+   tmp = RREG32(SPLL_CNTL_MODE);
+   tmp |= SPLL_SW_DIR_CONTROL;
+   WREG32(SPLL_CNTL_MODE, tmp);
+
+   tmp = RREG32(CG_SPLL_FUNC_CNTL);
+   tmp |= SPLL_RESET;
+   WREG32(CG_SPLL_FUNC_CNTL, tmp);
+
+   tmp = RREG32(CG_SPLL_FUNC_CNTL);
+   tmp |= SPLL_SLEEP;
+   WREG32(CG_SPLL_FUNC_CNTL, tmp);
+
+   tmp = RREG32(SPLL_CNTL_MODE);
+   tmp &= ~SPLL_SW_DIR_CONTROL;
+   WREG32(SPLL_CNTL_MODE, tmp);
+}
+
+static int si_gpu_pci_config_reset(struct amdgpu_device *adev)
+{
+   u32 i;
+   int r = -EINVAL;
+
+   dev_info(adev->dev, "GPU pci config reset\n");
+
+   /* set mclk/sclk to bypass */
+   si_set_clk_bypass_mode(adev);
+   /* powerdown spll */
+   si_spll_powerdown(adev);
+   /* disable BM */
+   pci_clear_master(adev->pdev);
+   /* reset */
+   amdgpu_device_pci_config_reset(adev);
+
+   udelay(100);
+
+   /* wait for asic to come out of reset */
+   for (i = 0; i < adev->usec_timeout; i++) {
+   if (RREG32(mmCONFIG_MEMSIZE) != 0x) {
+   /* enable BM */
+   pci_set_master(adev->pdev);
+   adev->has_hw_reset = true;
+   r = 0;
+   break;
+   }
+   udelay(1);
+   }
+
+   return r;
+}
+
 static int si_asic_reset(struct amdgpu_device *adev)
 {
-   return 0;
+   int r;
+
+   amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+   r = si_gpu_pci_config_reset(adev);
+
+   amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+   return r;
 }
 
 static bool si_asic_supports_baco(struct amdgpu_device *adev)
-- 
2.25.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm amdgpu: Skip tmr load for SRIOV

2020-07-27 Thread Deucher, Alexander
[AMD Official Use Only - Internal Distribution Only]

Please fix your git setup to use your name rather than "root" as the author.

Alex


From: Liu ChengZhe 
Sent: Monday, July 27, 2020 6:57 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Tuikov, Luben ; Koenig, Christian 
; Deucher, Alexander ; 
Xiao, Jack ; Zhang, Hawking ; Xu, 
Feifei ; Wang, Kevin(Yang) ; Yuan, 
Xiaojie ; Liu, Cheng Zhe 
Subject: [PATCH 1/2] drm amdgpu: Skip tmr load for SRIOV

From: root 

1. For Navi12, CHIP_SIENNA_CICHLID, skip tmr load operation;
2. Check pointer before release firmware.

Signed-off-by: root 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 40 +
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a053b7af0680..a9481e112cb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -193,12 +193,18 @@ static int psp_sw_fini(void *handle)
 struct amdgpu_device *adev = (struct amdgpu_device *)handle;

 psp_memory_training_fini(>psp);
-   release_firmware(adev->psp.sos_fw);
-   adev->psp.sos_fw = NULL;
-   release_firmware(adev->psp.asd_fw);
-   adev->psp.asd_fw = NULL;
-   release_firmware(adev->psp.ta_fw);
-   adev->psp.ta_fw = NULL;
+   if (adev->psp.sos_fw) {
+   release_firmware(adev->psp.sos_fw);
+   adev->psp.sos_fw = NULL;
+   }
+   if (adev->psp.asd_fw) {
+   release_firmware(adev->psp.asd_fw);
+   adev->psp.asd_fw = NULL;
+   }
+   if (adev->psp.ta_fw) {
+   release_firmware(adev->psp.ta_fw);
+   adev->psp.ta_fw = NULL;
+   }

 if (adev->asic_type == CHIP_NAVI10)
 psp_sysfs_fini(adev);
@@ -409,11 +415,33 @@ static int psp_clear_vf_fw(struct psp_context *psp)
 return ret;
 }

+static bool psp_skip_tmr(struct psp_context *psp)
+{
+   bool ret = false;
+
+   switch (psp->adev->asic_type) {
+   case CHIP_NAVI12:
+   case CHIP_SIENNA_CICHLID:
+   ret = true;
+   break;
+   default:
+   return false;
+   }
+
+   return ret;
+}
+
 static int psp_tmr_load(struct psp_context *psp)
 {
 int ret;
 struct psp_gfx_cmd_resp *cmd;

+   /* for Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not setup TMR
+* (already setup by host driver)
+*/
+   if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+   return 0;
+
 cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
 if (!cmd)
 return -ENOMEM;
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want

2020-07-27 Thread Felix Kuehling
Am 2020-07-27 um 5:26 a.m. schrieb Christian König:
> Am 27.07.20 um 10:21 schrieb Monk Liu:
>> what:
>> KCQ cost many clocks during world switch which impacts a lot to multi-VF
>> performance
>>
>> how:
>> introduce a paramter to control the number of KCQ to avoid performance
>> drop if there is no KQC needed
>>
>> notes:
>> this paramter only affects gfx 8/9/10
>
> Sounds like a good idea to me, but that needs a different name.
> Outside AMD most people don't know what a KCQ is.
>
> Just use compute queue or similar as name for this.

Just "compute queue" would be confusing for ROCm users. Maybe "legacy
compute queues"?

Regards,
  Felix


>
> Another comment below.
>
>>
>> Signed-off-by: Monk Liu 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 27
>> +-
>>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30
>> +++--
>>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29
>> ++--
>>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31
>> +++---
>>   7 files changed, 69 insertions(+), 56 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index e97c088..71a3d6a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -201,6 +201,7 @@ extern int amdgpu_si_support;
>>   #ifdef CONFIG_DRM_AMDGPU_CIK
>>   extern int amdgpu_cik_support;
>>   #endif
>> +extern int amdgpu_num_kcq_user_set;
>>     #define AMDGPU_VM_MAX_NUM_CTX    4096
>>   #define AMDGPU_SG_THRESHOLD    (256*1024*1024)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 62ecac9..61c7583 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct
>> amdgpu_device *adev)
>>     amdgpu_gmc_tmz_set(adev);
>>   +    if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)
>> +    amdgpu_num_kcq_user_set = 8;
>
> This needs a warning or error message if we overwrite invalid user
> provided parameters.
>
> Christian.
>
>> +
>>   return 0;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 6291f5f..03a94e9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -150,6 +150,7 @@ int amdgpu_noretry;
>>   int amdgpu_force_asic_type = -1;
>>   int amdgpu_tmz = 0;
>>   int amdgpu_reset_method = -1; /* auto */
>> +int amdgpu_num_kcq_user_set = 8;
>>     struct amdgpu_mgpu_info mgpu_info = {
>>   .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
>> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
>>   MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto
>> (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
>>   module_param_named(reset_method, amdgpu_reset_method, int, 0444);
>>   +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if
>> set to greater than 8 or less than 0, only affect gfx 8+)");
>> +module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
>> +
>>   static const struct pci_device_id pciidlist[] = {
>>   #ifdef  CONFIG_DRM_AMDGPU_SI
>>   {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index 8eff017..0b59049 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -202,7 +202,7 @@ bool
>> amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
>>     void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>>   {
>> -    int i, queue, pipe, mec;
>> +    int i, queue, pipe, mec, j = 0;
>>   bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
>>     /* policy for amdgpu compute queue ownership */
>> @@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct
>> amdgpu_device *adev)
>>     if (multipipe_policy) {
>>   /* policy: amdgpu owns the first two queues of the
>> first MEC */
>> -    if (mec == 0 && queue < 2)
>> -    set_bit(i, adev->gfx.mec.queue_bitmap);
>> +    if (mec == 0 && queue < 2) {
>> +    if (j++ < adev->gfx.num_compute_rings)
>> +    set_bit(i, adev->gfx.mec.queue_bitmap);
>> +    else
>> +    break;
>> +    }
>>   } else {
>>   /* policy: amdgpu owns all queues in the first pipe */
>> -    if (mec == 0 && pipe == 0)
>> -    set_bit(i, adev->gfx.mec.queue_bitmap);
>> +    if (mec 

Re: [PATCH] drm/amd/display: Use proper abm/backlight functions for DCN3

2020-07-27 Thread Kazlauskas, Nicholas

On 2020-07-27 11:23 a.m., Alex Deucher wrote:

On Mon, Jul 27, 2020 at 11:22 AM Bhawanpreet Lakha
 wrote:


Use DCN21 functions instead of DCE110

Signed-off-by: Bhawanpreet Lakha 


Acked-by: Alex Deucher 


Reviewed-by: Nicholas Kazlauskas 

Regards,
Nicholas Kazlauskas




---
  drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
index 1b354c219d0a..9afee7160490 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
@@ -26,6 +26,7 @@
  #include "dce110/dce110_hw_sequencer.h"
  #include "dcn10/dcn10_hw_sequencer.h"
  #include "dcn20/dcn20_hwseq.h"
+#include "dcn21/dcn21_hwseq.h"
  #include "dcn30_hwseq.h"

  static const struct hw_sequencer_funcs dcn30_funcs = {
@@ -87,8 +88,8 @@ static const struct hw_sequencer_funcs dcn30_funcs = {
 .set_flip_control_gsl = dcn20_set_flip_control_gsl,
 .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
 .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations,
-   .set_backlight_level = dce110_set_backlight_level,
-   .set_abm_immediate_disable = dce110_set_abm_immediate_disable,
+   .set_backlight_level = dcn21_set_backlight_level,
+   .set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
  };

  static const struct hwseq_private_funcs dcn30_private_funcs = {
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Clear dm_state for fast updates

2020-07-27 Thread Duncan
On Mon, 27 Jul 2020 10:05:01 -0400
"Kazlauskas, Nicholas"  wrote:

> On 2020-07-27 9:39 a.m., Christian König wrote:
> > Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> >> This patch fixes a race condition that causes a use-after-free
> >> during amdgpu_dm_atomic_commit_tail. This can occur when 2
> >> non-blocking commits are requested and the second one finishes
> >> before the first. Essentially, this bug occurs when the following
> >> sequence of events happens:
> >>
> >> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> >> deferred to the workqueue.
> >>
> >> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> >> deferred to the workqueue.
> >>
> >> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> >> commit_tail and commit #2 completes, freeing dm_state #1.
> >>
> >> 4. Commit #1 starts after commit #2 completes, uses the freed
> >> dm_state 1 and dereferences a freelist pointer while setting the
> >> context.
> > 
> > Well I only have a one mile high view on this, but why don't you
> > let the work items execute in order?
> > 
> > That would be better anyway cause this way we don't trigger a cache
> > line ping pong between CPUs.
> > 
> > Christian.
> 
> We use the DRM helpers for managing drm_atomic_commit_state and those 
> helpers internally push non-blocking commit work into the system
> unbound work queue.
> 
> While we could duplicate a copy of that code with nothing but the 
> workqueue changed that isn't something I'd really like to maintain
> going forward.
> 
> Regards,
> Nicholas Kazlauskas

Additionally, I don't see mentioned on-thread (it's on the bug and now
in the details below) that we're talking multi-monitor, not
single-monitor. Presumably that goes some way toward answering the "why
not force order?" question, considering the outputs may be running at
different refresh frequencies, etc...

All the reports on the bug seem to be multi-monitor (after seeing
multi-monitor/output in several reports I asked if anyone was seeing it
with only one monitor and no answers), and as you commented on the bug
for your proposed patch but seems missing from this one here (different
author/proposal) ...

Commits #1 and #2 don't touch any of the same core DRM objects (CRTCs,
Planes, Connectors) so Commit #2 does not stall for Commit #1. DRM
Private Objects have always been avoided in stall checks, so we have no
safety from DRM core in this regard.

> >>
> >> Since this bug has only been spotted with fast commits, this patch
> >> fixes the bug by clearing the dm_state instead of using the old
> >> dc_state for fast updates. In addition, since dm_state is only
> >> used for its dc_state and amdgpu_dm_atomic_commit_tail will retain
> >> the dc_state if none is found,
> >> removing the dm_state should not have any consequences in fast
> >> updates.
> >>
> >> This use-after-free bug has existed for a while now, but only
> >> caused a noticeable issue starting from 5.7-rc1 due to 3202fa62f
> >> ("slub: relocate freelist pointer to middle of object") moving the
> >> freelist pointer from dm_state->base (which was unused) to
> >> dm_state->context (which is dereferenced).
> >>
> >> Bugzilla: 
> >> https://bugzilla.kernel.org/show_bug.cgi?id=207383 
> >>
> >> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state
> >> for fast updates")
> >> Reported-by: Duncan <1i5t5.dun...@cox.net>
> >> Signed-off-by: Mazin Rezk 
> >> ---
> >>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36
> >> ++- 1 file changed, 27 insertions(+), 9
> >> deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> >> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >> index 86ffa0c2880f..710edc70e37e 100644
> >> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct 
> >> drm_device *dev,
> >>    * the same resource. If we have a new DC context as
> >> part of
> >>    * the DM atomic state from validation we need to free
> >> it and
> >>    * retain the existing one instead.
> >> + *
> >> + * Furthermore, since the DM atomic state only contains
> >> the DC
> >> + * context and can safely be annulled, we can free the
> >> state
> >> + * and clear the associated private object now to free
> >> + * some memory and avoid a possible use-after-free later.
> >>    */
> >> -    struct dm_atomic_state *new_dm_state, *old_dm_state;
> >>
> >> -    new_dm_state = dm_atomic_get_new_state(state);
> >> -    old_dm_state = dm_atomic_get_old_state(state);
> >> +    for (i = 0; i < state->num_private_objs; i++) {
> >> +    struct drm_private_obj *obj =
> >> state->private_objs[i].ptr;
> >>
> >> -    if (new_dm_state && old_dm_state) {
> >> -    if (new_dm_state->context)
> >> -    

Re: [PATCH] drm/amd/display: Use proper abm/backlight functions for DCN3

2020-07-27 Thread Alex Deucher
On Mon, Jul 27, 2020 at 11:22 AM Bhawanpreet Lakha
 wrote:
>
> Use DCN21 functions instead of DCE110
>
> Signed-off-by: Bhawanpreet Lakha 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c 
> b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
> index 1b354c219d0a..9afee7160490 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
> @@ -26,6 +26,7 @@
>  #include "dce110/dce110_hw_sequencer.h"
>  #include "dcn10/dcn10_hw_sequencer.h"
>  #include "dcn20/dcn20_hwseq.h"
> +#include "dcn21/dcn21_hwseq.h"
>  #include "dcn30_hwseq.h"
>
>  static const struct hw_sequencer_funcs dcn30_funcs = {
> @@ -87,8 +88,8 @@ static const struct hw_sequencer_funcs dcn30_funcs = {
> .set_flip_control_gsl = dcn20_set_flip_control_gsl,
> .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
> .apply_idle_power_optimizations = 
> dcn30_apply_idle_power_optimizations,
> -   .set_backlight_level = dce110_set_backlight_level,
> -   .set_abm_immediate_disable = dce110_set_abm_immediate_disable,
> +   .set_backlight_level = dcn21_set_backlight_level,
> +   .set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
>  };
>
>  static const struct hwseq_private_funcs dcn30_private_funcs = {
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: Use proper abm/backlight functions for DCN3

2020-07-27 Thread Bhawanpreet Lakha
Use DCN21 functions instead of DCE110

Signed-off-by: Bhawanpreet Lakha 
---
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
index 1b354c219d0a..9afee7160490 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
@@ -26,6 +26,7 @@
 #include "dce110/dce110_hw_sequencer.h"
 #include "dcn10/dcn10_hw_sequencer.h"
 #include "dcn20/dcn20_hwseq.h"
+#include "dcn21/dcn21_hwseq.h"
 #include "dcn30_hwseq.h"
 
 static const struct hw_sequencer_funcs dcn30_funcs = {
@@ -87,8 +88,8 @@ static const struct hw_sequencer_funcs dcn30_funcs = {
.set_flip_control_gsl = dcn20_set_flip_control_gsl,
.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
.apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations,
-   .set_backlight_level = dce110_set_backlight_level,
-   .set_abm_immediate_disable = dce110_set_abm_immediate_disable,
+   .set_backlight_level = dcn21_set_backlight_level,
+   .set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
 };
 
 static const struct hwseq_private_funcs dcn30_private_funcs = {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Clear dm_state for fast updates

2020-07-27 Thread Kazlauskas, Nicholas

On 2020-07-27 9:39 a.m., Christian König wrote:

Am 27.07.20 um 07:40 schrieb Mazin Rezk:

This patch fixes a race condition that causes a use-after-free during
amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking commits
are requested and the second one finishes before the first. Essentially,
this bug occurs when the following sequence of events happens:

1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
deferred to the workqueue.

2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
deferred to the workqueue.

3. Commit #2 starts before commit #1, dm_state #1 is used in the
commit_tail and commit #2 completes, freeing dm_state #1.

4. Commit #1 starts after commit #2 completes, uses the freed dm_state
1 and dereferences a freelist pointer while setting the context.


Well I only have a one mile high view on this, but why don't you let the 
work items execute in order?


That would be better anyway cause this way we don't trigger a cache line 
ping pong between CPUs.


Christian.


We use the DRM helpers for managing drm_atomic_commit_state and those 
helpers internally push non-blocking commit work into the system unbound 
work queue.


While we could duplicate a copy of that code with nothing but the 
workqueue changed that isn't something I'd really like to maintain going 
forward.


Regards,
Nicholas Kazlauskas





Since this bug has only been spotted with fast commits, this patch fixes
the bug by clearing the dm_state instead of using the old dc_state for
fast updates. In addition, since dm_state is only used for its dc_state
and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is 
found,

removing the dm_state should not have any consequences in fast updates.

This use-after-free bug has existed for a while now, but only caused a
noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: relocate
freelist pointer to middle of object") moving the freelist pointer from
dm_state->base (which was unused) to dm_state->context (which is
dereferenced).

Bugzilla: 
https://bugzilla.kernel.org/show_bug.cgi?id=207383 

Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state for 
fast updates")

Reported-by: Duncan <1i5t5.dun...@cox.net>
Signed-off-by: Mazin Rezk 
---
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++-
  1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

index 86ffa0c2880f..710edc70e37e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct 
drm_device *dev,

   * the same resource. If we have a new DC context as part of
   * the DM atomic state from validation we need to free it and
   * retain the existing one instead.
+ *
+ * Furthermore, since the DM atomic state only contains the DC
+ * context and can safely be annulled, we can free the state
+ * and clear the associated private object now to free
+ * some memory and avoid a possible use-after-free later.
   */
-    struct dm_atomic_state *new_dm_state, *old_dm_state;

-    new_dm_state = dm_atomic_get_new_state(state);
-    old_dm_state = dm_atomic_get_old_state(state);
+    for (i = 0; i < state->num_private_objs; i++) {
+    struct drm_private_obj *obj = state->private_objs[i].ptr;

-    if (new_dm_state && old_dm_state) {
-    if (new_dm_state->context)
-    dc_release_state(new_dm_state->context);
+    if (obj->funcs == adev->dm.atomic_obj.funcs) {
+    int j = state->num_private_objs-1;

-    new_dm_state->context = old_dm_state->context;
+    dm_atomic_destroy_state(obj,
+    state->private_objs[i].state);
+
+    /* If i is not at the end of the array then the
+ * last element needs to be moved to where i was
+ * before the array can safely be truncated.
+ */
+    if (i != j)
+    state->private_objs[i] =
+    state->private_objs[j];

-    if (old_dm_state->context)
-    dc_retain_state(old_dm_state->context);
+    state->private_objs[j].ptr = NULL;
+    state->private_objs[j].state = NULL;
+    state->private_objs[j].old_state = NULL;
+    state->private_objs[j].new_state = NULL;
+
+    state->num_private_objs = j;
+    break;
+    }
  }
  }

--
2.27.0





___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 1/2] drm/radeon: switch from 'pci_' to 'dma_' API

2020-07-27 Thread Christian König

Am 27.07.20 um 12:34 schrieb Christophe JAILLET:

The wrappers in include/linux/pci-dma-compat.h should go away.

The patch has been generated with the coccinelle script below and has been
hand modified to replace GFP_ with a correct flag.
It has been compile tested.

When memory is allocated in 'radeon_gart_table_ram_alloc()' GFP_KERNEL
can be used because its callers already use this flag.

Both 'r100_pci_gart_init()' (r100.c) and 'rs400_gart_init()' (rs400.c)
call 'radeon_gart_init()'.
This function uses 'vmalloc'.


@@
@@
-PCI_DMA_BIDIRECTIONAL
+DMA_BIDIRECTIONAL

@@
@@
-PCI_DMA_TODEVICE
+DMA_TO_DEVICE

@@
@@
-PCI_DMA_FROMDEVICE
+DMA_FROM_DEVICE

@@
@@
-PCI_DMA_NONE
+DMA_NONE

@@
expression e1, e2, e3;
@@
-pci_alloc_consistent(e1, e2, e3)
+dma_alloc_coherent(>dev, e2, e3, GFP_)

@@
expression e1, e2, e3;
@@
-pci_zalloc_consistent(e1, e2, e3)
+dma_alloc_coherent(>dev, e2, e3, GFP_)

@@
expression e1, e2, e3, e4;
@@
-pci_free_consistent(e1, e2, e3, e4)
+dma_free_coherent(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_map_single(e1, e2, e3, e4)
+dma_map_single(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_unmap_single(e1, e2, e3, e4)
+dma_unmap_single(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4, e5;
@@
-pci_map_page(e1, e2, e3, e4, e5)
+dma_map_page(>dev, e2, e3, e4, e5)

@@
expression e1, e2, e3, e4;
@@
-pci_unmap_page(e1, e2, e3, e4)
+dma_unmap_page(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_map_sg(e1, e2, e3, e4)
+dma_map_sg(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_unmap_sg(e1, e2, e3, e4)
+dma_unmap_sg(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_dma_sync_single_for_cpu(e1, e2, e3, e4)
+dma_sync_single_for_cpu(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_dma_sync_single_for_device(e1, e2, e3, e4)
+dma_sync_single_for_device(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_dma_sync_sg_for_cpu(e1, e2, e3, e4)
+dma_sync_sg_for_cpu(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_dma_sync_sg_for_device(e1, e2, e3, e4)
+dma_sync_sg_for_device(>dev, e2, e3, e4)

@@
expression e1, e2;
@@
-pci_dma_mapping_error(e1, e2)
+dma_mapping_error(>dev, e2)

@@
expression e1, e2;
@@
-pci_set_dma_mask(e1, e2)
+dma_set_mask(>dev, e2)

@@
expression e1, e2;
@@
-pci_set_consistent_dma_mask(e1, e2)
+dma_set_coherent_mask(>dev, e2)

Signed-off-by: Christophe JAILLET 


Reviewed-by: Christian König 


---
If needed, see post from Christoph Hellwig on the kernel-janitors ML:
https://marc.info/?l=kernel-janitors=158745678307186=4
---
  drivers/gpu/drm/radeon/radeon_gart.c | 9 -
  1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index f178ba321715..b7ce254e5663 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -72,8 +72,8 @@ int radeon_gart_table_ram_alloc(struct radeon_device *rdev)
  {
void *ptr;
  
-	ptr = pci_alloc_consistent(rdev->pdev, rdev->gart.table_size,

-  >gart.table_addr);
+   ptr = dma_alloc_coherent(>pdev->dev, rdev->gart.table_size,
+>gart.table_addr, GFP_KERNEL);
if (ptr == NULL) {
return -ENOMEM;
}
@@ -110,9 +110,8 @@ void radeon_gart_table_ram_free(struct radeon_device *rdev)
  rdev->gart.table_size >> PAGE_SHIFT);
}
  #endif
-   pci_free_consistent(rdev->pdev, rdev->gart.table_size,
-   (void *)rdev->gart.ptr,
-   rdev->gart.table_addr);
+   dma_free_coherent(>pdev->dev, rdev->gart.table_size,
+ (void *)rdev->gart.ptr, rdev->gart.table_addr);
rdev->gart.ptr = NULL;
rdev->gart.table_addr = 0;
  }


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/powerplay: off by one bugs in smu_cmn_to_asic_specific_index()

2020-07-27 Thread Dan Carpenter
These tables have _COUNT number of elements so the comparisons should be
>= instead of > to prevent reading one element beyond the end of the
array.

Fixes: 8264ee69f0d8 ("drm/amd/powerplay: drop unused code")
Signed-off-by: Dan Carpenter 
---
 drivers/gpu/drm/amd/powerplay/smu_cmn.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/smu_cmn.c 
b/drivers/gpu/drm/amd/powerplay/smu_cmn.c
index be4b678d0e60..5c23c44c33bd 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_cmn.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_cmn.c
@@ -166,7 +166,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu,
 
switch (type) {
case CMN2ASIC_MAPPING_MSG:
-   if (index > SMU_MSG_MAX_COUNT ||
+   if (index >= SMU_MSG_MAX_COUNT ||
!smu->message_map)
return -EINVAL;
 
@@ -181,7 +181,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu,
return msg_mapping.map_to;
 
case CMN2ASIC_MAPPING_CLK:
-   if (index > SMU_CLK_COUNT ||
+   if (index >= SMU_CLK_COUNT ||
!smu->clock_map)
return -EINVAL;
 
@@ -192,7 +192,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu,
return mapping.map_to;
 
case CMN2ASIC_MAPPING_FEATURE:
-   if (index > SMU_FEATURE_COUNT ||
+   if (index >= SMU_FEATURE_COUNT ||
!smu->feature_map)
return -EINVAL;
 
@@ -203,7 +203,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu,
return mapping.map_to;
 
case CMN2ASIC_MAPPING_TABLE:
-   if (index > SMU_TABLE_COUNT ||
+   if (index >= SMU_TABLE_COUNT ||
!smu->table_map)
return -EINVAL;
 
@@ -214,7 +214,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu,
return mapping.map_to;
 
case CMN2ASIC_MAPPING_PWR:
-   if (index > SMU_POWER_SOURCE_COUNT ||
+   if (index >= SMU_POWER_SOURCE_COUNT ||
!smu->pwr_src_map)
return -EINVAL;
 
-- 
2.27.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/radeon: avoid a useless memset

2020-07-27 Thread Christian König

Am 27.07.20 um 12:34 schrieb Christophe JAILLET:

Avoid a memset after a call to 'dma_alloc_coherent()'.
This is useless since
commit 518a2f1925c3 ("dma-mapping: zero memory returned from dma_alloc_*")

Signed-off-by: Christophe JAILLET 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/radeon/radeon_gart.c | 1 -
  1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index b7ce254e5663..3808a753127b 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -85,7 +85,6 @@ int radeon_gart_table_ram_alloc(struct radeon_device *rdev)
}
  #endif
rdev->gart.ptr = ptr;
-   memset((void *)rdev->gart.ptr, 0, rdev->gart.table_size);
return 0;
  }
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Clear dm_state for fast updates

2020-07-27 Thread Christian König

Am 27.07.20 um 07:40 schrieb Mazin Rezk:

This patch fixes a race condition that causes a use-after-free during
amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking commits
are requested and the second one finishes before the first. Essentially,
this bug occurs when the following sequence of events happens:

1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
deferred to the workqueue.

2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
deferred to the workqueue.

3. Commit #2 starts before commit #1, dm_state #1 is used in the
commit_tail and commit #2 completes, freeing dm_state #1.

4. Commit #1 starts after commit #2 completes, uses the freed dm_state
1 and dereferences a freelist pointer while setting the context.


Well I only have a one mile high view on this, but why don't you let the 
work items execute in order?


That would be better anyway cause this way we don't trigger a cache line 
ping pong between CPUs.


Christian.



Since this bug has only been spotted with fast commits, this patch fixes
the bug by clearing the dm_state instead of using the old dc_state for
fast updates. In addition, since dm_state is only used for its dc_state
and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is found,
removing the dm_state should not have any consequences in fast updates.

This use-after-free bug has existed for a while now, but only caused a
noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: relocate
freelist pointer to middle of object") moving the freelist pointer from
dm_state->base (which was unused) to dm_state->context (which is
dereferenced).

Bugzilla: 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbugzilla.kernel.org%2Fshow_bug.cgi%3Fid%3D207383data=02%7C01%7Cchristian.koenig%40amd.com%7C16d6d6d4a02241deb94e08d831efa1bb%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637314252605493548sdata=JuaMFSMTjAVQBBpbXIf2RWj%2BLcx19ki25XLXbr1C6RA%3Dreserved=0
Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state for fast 
updates")
Reported-by: Duncan <1i5t5.dun...@cox.net>
Signed-off-by: Mazin Rezk 
---
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++-
  1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 86ffa0c2880f..710edc70e37e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct drm_device 
*dev,
 * the same resource. If we have a new DC context as part of
 * the DM atomic state from validation we need to free it and
 * retain the existing one instead.
+*
+* Furthermore, since the DM atomic state only contains the DC
+* context and can safely be annulled, we can free the state
+* and clear the associated private object now to free
+* some memory and avoid a possible use-after-free later.
 */
-   struct dm_atomic_state *new_dm_state, *old_dm_state;

-   new_dm_state = dm_atomic_get_new_state(state);
-   old_dm_state = dm_atomic_get_old_state(state);
+   for (i = 0; i < state->num_private_objs; i++) {
+   struct drm_private_obj *obj = 
state->private_objs[i].ptr;

-   if (new_dm_state && old_dm_state) {
-   if (new_dm_state->context)
-   dc_release_state(new_dm_state->context);
+   if (obj->funcs == adev->dm.atomic_obj.funcs) {
+   int j = state->num_private_objs-1;

-   new_dm_state->context = old_dm_state->context;
+   dm_atomic_destroy_state(obj,
+   state->private_objs[i].state);
+
+   /* If i is not at the end of the array then the
+* last element needs to be moved to where i was
+* before the array can safely be truncated.
+*/
+   if (i != j)
+   state->private_objs[i] =
+   state->private_objs[j];

-   if (old_dm_state->context)
-   dc_retain_state(old_dm_state->context);
+   state->private_objs[j].ptr = NULL;
+   state->private_objs[j].state = NULL;
+   state->private_objs[j].old_state = NULL;
+   state->private_objs[j].new_state = NULL;
+
+   state->num_private_objs = j;
+   

Re: [PATCH] drm/amd/display: Clear dm_state for fast updates

2020-07-27 Thread Kazlauskas, Nicholas

On 2020-07-27 1:40 a.m., Mazin Rezk wrote:

This patch fixes a race condition that causes a use-after-free during
amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking commits
are requested and the second one finishes before the first. Essentially,
this bug occurs when the following sequence of events happens:

1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
deferred to the workqueue.

2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
deferred to the workqueue.

3. Commit #2 starts before commit #1, dm_state #1 is used in the
commit_tail and commit #2 completes, freeing dm_state #1.

4. Commit #1 starts after commit #2 completes, uses the freed dm_state
1 and dereferences a freelist pointer while setting the context.

Since this bug has only been spotted with fast commits, this patch fixes
the bug by clearing the dm_state instead of using the old dc_state for
fast updates. In addition, since dm_state is only used for its dc_state
and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is found,
removing the dm_state should not have any consequences in fast updates.

This use-after-free bug has existed for a while now, but only caused a
noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: relocate
freelist pointer to middle of object") moving the freelist pointer from
dm_state->base (which was unused) to dm_state->context (which is
dereferenced).

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state for fast 
updates")
Reported-by: Duncan <1i5t5.dun...@cox.net>
Signed-off-by: Mazin Rezk 
---
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++-
  1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 86ffa0c2880f..710edc70e37e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct drm_device 
*dev,
 * the same resource. If we have a new DC context as part of
 * the DM atomic state from validation we need to free it and
 * retain the existing one instead.
+*
+* Furthermore, since the DM atomic state only contains the DC
+* context and can safely be annulled, we can free the state
+* and clear the associated private object now to free
+* some memory and avoid a possible use-after-free later.
 */
-   struct dm_atomic_state *new_dm_state, *old_dm_state;

-   new_dm_state = dm_atomic_get_new_state(state);
-   old_dm_state = dm_atomic_get_old_state(state);
+   for (i = 0; i < state->num_private_objs; i++) {
+   struct drm_private_obj *obj = 
state->private_objs[i].ptr;

-   if (new_dm_state && old_dm_state) {
-   if (new_dm_state->context)
-   dc_release_state(new_dm_state->context);
+   if (obj->funcs == adev->dm.atomic_obj.funcs) {
+   int j = state->num_private_objs-1;

-   new_dm_state->context = old_dm_state->context;
+   dm_atomic_destroy_state(obj,
+   state->private_objs[i].state);
+
+   /* If i is not at the end of the array then the
+* last element needs to be moved to where i was
+* before the array can safely be truncated.
+*/
+   if (i != j)
+   state->private_objs[i] =
+   state->private_objs[j];

-   if (old_dm_state->context)
-   dc_retain_state(old_dm_state->context);
+   state->private_objs[j].ptr = NULL;
+   state->private_objs[j].state = NULL;
+   state->private_objs[j].old_state = NULL;
+   state->private_objs[j].new_state = NULL;
+
+   state->num_private_objs = j;
+   break;
+   }


In the bug report itself I mentioned that I don't really like hacking 
around the DRM core for resolving this patch but to go into more 
specifics, it's really two issues of code maintenance:


1. It's iterating over internal structures and layout of private objects 
in the state and modifying the state. The core doesn't really guarantee 
how these things are going to be laid out and it may change in the future.


2. It's freeing an allocation we 

[PATCH] drm/amdkfd: option to disable system mem limit

2020-07-27 Thread Philip Yang
If multiple process share system memory through /dev/shm, KFD allocate
memory should not fail if it reachs the system memory limit because
one copy of physical system memory are shared by multiple process.

Add module parameter to provide user option to disable system memory
limit check, to run multiple process share memory application. By
default the system memory limit is on.

Print out debug message to warn user if KFD allocate memory failed
because of system memory limit.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 9 +
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088d03b3..3c0d5ecfe0d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -187,9 +187,11 @@ extern int amdgpu_force_asic_type;
 #ifdef CONFIG_HSA_AMD
 extern int sched_policy;
 extern bool debug_evictions;
+extern bool no_system_mem_limit;
 #else
 static const int sched_policy = KFD_SCHED_POLICY_HWS;
 static const bool debug_evictions; /* = false */
+static const bool no_system_mem_limit;
 #endif
 
 extern int amdgpu_tmz;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 8703aa1fe4a5..502e8204c012 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -99,7 +99,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
mem *= si.mem_unit;
 
spin_lock_init(_mem_limit.mem_limit_lock);
-   kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
+   if (no_system_mem_limit)
+   kfd_mem_limit.max_system_mem_limit = U64_MAX;
+   else
+   kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
@@ -148,6 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
 
spin_lock(_mem_limit.mem_limit_lock);
 
+   if (kfd_mem_limit.system_mem_used + system_mem_needed >
+   kfd_mem_limit.max_system_mem_limit)
+   pr_debug("Set no_system_mem_limit if using shared memory\n");
+
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
 kfd_mem_limit.max_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f0d223..e9acd0a9f327 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -715,6 +715,15 @@ MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue 
preemption timeout in ms (1
 bool debug_evictions;
 module_param(debug_evictions, bool, 0644);
 MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = 
default)");
+
+/**
+ * DOC: no_system_mem_limit(bool)
+ * Disable system memory limit, to support multiple process shared memory
+ */
+bool no_system_mem_limit;
+module_param(no_system_mem_limit, bool, 0644);
+MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = 
default)");
+
 #endif
 
 /**
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/radeon: avoid a useless memset

2020-07-27 Thread Christophe JAILLET
Avoid a memset after a call to 'dma_alloc_coherent()'.
This is useless since
commit 518a2f1925c3 ("dma-mapping: zero memory returned from dma_alloc_*")

Signed-off-by: Christophe JAILLET 
---
 drivers/gpu/drm/radeon/radeon_gart.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index b7ce254e5663..3808a753127b 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -85,7 +85,6 @@ int radeon_gart_table_ram_alloc(struct radeon_device *rdev)
}
 #endif
rdev->gart.ptr = ptr;
-   memset((void *)rdev->gart.ptr, 0, rdev->gart.table_size);
return 0;
 }
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/radeon: switch from 'pci_' to 'dma_' API

2020-07-27 Thread Christophe JAILLET
The wrappers in include/linux/pci-dma-compat.h should go away.

The patch has been generated with the coccinelle script below and has been
hand modified to replace GFP_ with a correct flag.
It has been compile tested.

When memory is allocated in 'radeon_gart_table_ram_alloc()' GFP_KERNEL
can be used because its callers already use this flag.

Both 'r100_pci_gart_init()' (r100.c) and 'rs400_gart_init()' (rs400.c)
call 'radeon_gart_init()'.
This function uses 'vmalloc'.


@@
@@
-PCI_DMA_BIDIRECTIONAL
+DMA_BIDIRECTIONAL

@@
@@
-PCI_DMA_TODEVICE
+DMA_TO_DEVICE

@@
@@
-PCI_DMA_FROMDEVICE
+DMA_FROM_DEVICE

@@
@@
-PCI_DMA_NONE
+DMA_NONE

@@
expression e1, e2, e3;
@@
-pci_alloc_consistent(e1, e2, e3)
+dma_alloc_coherent(>dev, e2, e3, GFP_)

@@
expression e1, e2, e3;
@@
-pci_zalloc_consistent(e1, e2, e3)
+dma_alloc_coherent(>dev, e2, e3, GFP_)

@@
expression e1, e2, e3, e4;
@@
-pci_free_consistent(e1, e2, e3, e4)
+dma_free_coherent(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_map_single(e1, e2, e3, e4)
+dma_map_single(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_unmap_single(e1, e2, e3, e4)
+dma_unmap_single(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4, e5;
@@
-pci_map_page(e1, e2, e3, e4, e5)
+dma_map_page(>dev, e2, e3, e4, e5)

@@
expression e1, e2, e3, e4;
@@
-pci_unmap_page(e1, e2, e3, e4)
+dma_unmap_page(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_map_sg(e1, e2, e3, e4)
+dma_map_sg(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_unmap_sg(e1, e2, e3, e4)
+dma_unmap_sg(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_dma_sync_single_for_cpu(e1, e2, e3, e4)
+dma_sync_single_for_cpu(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_dma_sync_single_for_device(e1, e2, e3, e4)
+dma_sync_single_for_device(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_dma_sync_sg_for_cpu(e1, e2, e3, e4)
+dma_sync_sg_for_cpu(>dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-pci_dma_sync_sg_for_device(e1, e2, e3, e4)
+dma_sync_sg_for_device(>dev, e2, e3, e4)

@@
expression e1, e2;
@@
-pci_dma_mapping_error(e1, e2)
+dma_mapping_error(>dev, e2)

@@
expression e1, e2;
@@
-pci_set_dma_mask(e1, e2)
+dma_set_mask(>dev, e2)

@@
expression e1, e2;
@@
-pci_set_consistent_dma_mask(e1, e2)
+dma_set_coherent_mask(>dev, e2)

Signed-off-by: Christophe JAILLET 
---
If needed, see post from Christoph Hellwig on the kernel-janitors ML:
   https://marc.info/?l=kernel-janitors=158745678307186=4
---
 drivers/gpu/drm/radeon/radeon_gart.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index f178ba321715..b7ce254e5663 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -72,8 +72,8 @@ int radeon_gart_table_ram_alloc(struct radeon_device *rdev)
 {
void *ptr;
 
-   ptr = pci_alloc_consistent(rdev->pdev, rdev->gart.table_size,
-  >gart.table_addr);
+   ptr = dma_alloc_coherent(>pdev->dev, rdev->gart.table_size,
+>gart.table_addr, GFP_KERNEL);
if (ptr == NULL) {
return -ENOMEM;
}
@@ -110,9 +110,8 @@ void radeon_gart_table_ram_free(struct radeon_device *rdev)
  rdev->gart.table_size >> PAGE_SHIFT);
}
 #endif
-   pci_free_consistent(rdev->pdev, rdev->gart.table_size,
-   (void *)rdev->gart.ptr,
-   rdev->gart.table_addr);
+   dma_free_coherent(>pdev->dev, rdev->gart.table_size,
+ (void *)rdev->gart.ptr, rdev->gart.table_addr);
rdev->gart.ptr = NULL;
rdev->gart.table_addr = 0;
 }
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: fix PSP autoload twice in FLR

2020-07-27 Thread Liu ChengZhe
the block->status.hw = false assignment will overwrite PSP's previous
hw status, which will cause PSP execute resume operation after hw init.

Signed-off-by: Liu ChengZhe 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac97fbd2..88c681957d39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2574,6 +2574,10 @@ static int amdgpu_device_ip_reinit_early_sriov(struct 
amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_IH,
};
 
+   for (i = 0; i < adev->num_ip_blocks; i++) {
+   adev->ip_blocks[i].status.hw = false;
+   }
+
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
int j;
struct amdgpu_ip_block *block;
@@ -2581,7 +2585,6 @@ static int amdgpu_device_ip_reinit_early_sriov(struct 
amdgpu_device *adev)
for (j = 0; j < adev->num_ip_blocks; j++) {
block = >ip_blocks[j];
 
-   block->status.hw = false;
if (block->version->type != ip_order[i] ||
!block->status.valid)
continue;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Clear dm_state for fast updates

2020-07-27 Thread Mazin Rezk
On Monday, July 27, 2020 1:40 AM, Mazin Rezk  wrote:
> This patch fixes a race condition that causes a use-after-free during
> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking commits
> are requested and the second one finishes before the first. Essentially,
> this bug occurs when the following sequence of events happens:
>
> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> deferred to the workqueue.
>
> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> deferred to the workqueue.
>
> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> commit_tail and commit #2 completes, freeing dm_state #1.
>
> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> 1 and dereferences a freelist pointer while setting the context.
>
> Since this bug has only been spotted with fast commits, this patch fixes
> the bug by clearing the dm_state instead of using the old dc_state for
> fast updates. In addition, since dm_state is only used for its dc_state
> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is found,
> removing the dm_state should not have any consequences in fast updates.
>
> This use-after-free bug has existed for a while now, but only caused a
> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: relocate
> freelist pointer to middle of object") moving the freelist pointer from
> dm_state->base (which was unused) to dm_state->context (which is
> dereferenced).
>
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state for fast 
> updates")
> Reported-by: Duncan <1i5t5.dun...@cox.net>
> Signed-off-by: Mazin Rezk 
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++-
>  1 file changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 86ffa0c2880f..710edc70e37e 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct drm_device 
> *dev,
>* the same resource. If we have a new DC context as part of
>* the DM atomic state from validation we need to free it and
>* retain the existing one instead.
> +  *
> +  * Furthermore, since the DM atomic state only contains the DC
> +  * context and can safely be annulled, we can free the state
> +  * and clear the associated private object now to free
> +  * some memory and avoid a possible use-after-free later.
>*/
> - struct dm_atomic_state *new_dm_state, *old_dm_state;
>
> - new_dm_state = dm_atomic_get_new_state(state);
> - old_dm_state = dm_atomic_get_old_state(state);
> + for (i = 0; i < state->num_private_objs; i++) {
> + struct drm_private_obj *obj = 
> state->private_objs[i].ptr;
>
> - if (new_dm_state && old_dm_state) {
> - if (new_dm_state->context)
> - dc_release_state(new_dm_state->context);
> + if (obj->funcs == adev->dm.atomic_obj.funcs) {
> + int j = state->num_private_objs-1;
>
> - new_dm_state->context = old_dm_state->context;
> + dm_atomic_destroy_state(obj,
> + state->private_objs[i].state);
> +
> + /* If i is not at the end of the array then the
> +  * last element needs to be moved to where i was
> +  * before the array can safely be truncated.
> +  */
> + if (i != j)
> + state->private_objs[i] =
> + state->private_objs[j];
>
> - if (old_dm_state->context)
> - dc_retain_state(old_dm_state->context);
> + state->private_objs[j].ptr = NULL;
> + state->private_objs[j].state = NULL;
> + state->private_objs[j].old_state = NULL;
> + state->private_objs[j].new_state = NULL;
> +
> + state->num_private_objs = j;
> + break;
> + }
>   }
>   }
>
> --
> 2.27.0
>

I have tested this on 5.8.0-rc6 w/ an RX 480 for 8 hours and I have not had
the crash described in the Bugzilla thread. I will also be running this
patch on my kernel for the next couple of days to further confirm that this
is working. In addition, I will ask the users in the Bugzilla thread to
test this 

[PATCH] drm/amd/display: Clear dm_state for fast updates

2020-07-27 Thread Mazin Rezk
This patch fixes a race condition that causes a use-after-free during
amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking commits
are requested and the second one finishes before the first. Essentially,
this bug occurs when the following sequence of events happens:

1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
deferred to the workqueue.

2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
deferred to the workqueue.

3. Commit #2 starts before commit #1, dm_state #1 is used in the
commit_tail and commit #2 completes, freeing dm_state #1.

4. Commit #1 starts after commit #2 completes, uses the freed dm_state
1 and dereferences a freelist pointer while setting the context.

Since this bug has only been spotted with fast commits, this patch fixes
the bug by clearing the dm_state instead of using the old dc_state for
fast updates. In addition, since dm_state is only used for its dc_state
and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is found,
removing the dm_state should not have any consequences in fast updates.

This use-after-free bug has existed for a while now, but only caused a
noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: relocate
freelist pointer to middle of object") moving the freelist pointer from
dm_state->base (which was unused) to dm_state->context (which is
dereferenced).

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state for fast 
updates")
Reported-by: Duncan <1i5t5.dun...@cox.net>
Signed-off-by: Mazin Rezk 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++-
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 86ffa0c2880f..710edc70e37e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct drm_device 
*dev,
 * the same resource. If we have a new DC context as part of
 * the DM atomic state from validation we need to free it and
 * retain the existing one instead.
+*
+* Furthermore, since the DM atomic state only contains the DC
+* context and can safely be annulled, we can free the state
+* and clear the associated private object now to free
+* some memory and avoid a possible use-after-free later.
 */
-   struct dm_atomic_state *new_dm_state, *old_dm_state;

-   new_dm_state = dm_atomic_get_new_state(state);
-   old_dm_state = dm_atomic_get_old_state(state);
+   for (i = 0; i < state->num_private_objs; i++) {
+   struct drm_private_obj *obj = 
state->private_objs[i].ptr;

-   if (new_dm_state && old_dm_state) {
-   if (new_dm_state->context)
-   dc_release_state(new_dm_state->context);
+   if (obj->funcs == adev->dm.atomic_obj.funcs) {
+   int j = state->num_private_objs-1;

-   new_dm_state->context = old_dm_state->context;
+   dm_atomic_destroy_state(obj,
+   state->private_objs[i].state);
+
+   /* If i is not at the end of the array then the
+* last element needs to be moved to where i was
+* before the array can safely be truncated.
+*/
+   if (i != j)
+   state->private_objs[i] =
+   state->private_objs[j];

-   if (old_dm_state->context)
-   dc_retain_state(old_dm_state->context);
+   state->private_objs[j].ptr = NULL;
+   state->private_objs[j].state = NULL;
+   state->private_objs[j].old_state = NULL;
+   state->private_objs[j].new_state = NULL;
+
+   state->num_private_objs = j;
+   break;
+   }
}
}

--
2.27.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm amdgpu: Skip tmr load for SRIOV

2020-07-27 Thread Liu ChengZhe
From: root 

1. For Navi12, CHIP_SIENNA_CICHLID, skip tmr load operation;
2. Check pointer before release firmware.

Signed-off-by: root 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 40 +
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a053b7af0680..a9481e112cb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -193,12 +193,18 @@ static int psp_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
psp_memory_training_fini(>psp);
-   release_firmware(adev->psp.sos_fw);
-   adev->psp.sos_fw = NULL;
-   release_firmware(adev->psp.asd_fw);
-   adev->psp.asd_fw = NULL;
-   release_firmware(adev->psp.ta_fw);
-   adev->psp.ta_fw = NULL;
+   if (adev->psp.sos_fw) {
+   release_firmware(adev->psp.sos_fw);
+   adev->psp.sos_fw = NULL;
+   }
+   if (adev->psp.asd_fw) {
+   release_firmware(adev->psp.asd_fw);
+   adev->psp.asd_fw = NULL;
+   }
+   if (adev->psp.ta_fw) {
+   release_firmware(adev->psp.ta_fw);
+   adev->psp.ta_fw = NULL;
+   }
 
if (adev->asic_type == CHIP_NAVI10)
psp_sysfs_fini(adev);
@@ -409,11 +415,33 @@ static int psp_clear_vf_fw(struct psp_context *psp)
return ret;
 }
 
+static bool psp_skip_tmr(struct psp_context *psp)
+{
+   bool ret = false;
+
+   switch (psp->adev->asic_type) {
+   case CHIP_NAVI12:
+   case CHIP_SIENNA_CICHLID:
+   ret = true;
+   break;
+   default:
+   return false;
+   }
+
+   return ret;
+}
+
 static int psp_tmr_load(struct psp_context *psp)
 {
int ret;
struct psp_gfx_cmd_resp *cmd;
 
+   /* for Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not setup TMR
+* (already setup by host driver)
+*/
+   if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+   return 0;
+
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v2)

2020-07-27 Thread Monk Liu
what:
the MQD's save and restore of kernel compute queues cost lots of clocks
during world switch which impacts a lot to multi-VF performance

how:
introduce a paramter to control the number of kernel compute queues to
avoid performance drop if there is no kernel compute queue needed

notes:
this paramter only affects gfx 8/9/10

TODO:
in the future we will let hypervisor driver to set this paramter
automatically thus no need for user to configure it through
modprobe in virtual machine

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 27 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 +++---
 7 files changed, 71 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..71a3d6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 extern int amdgpu_cik_support;
 #endif
+extern int amdgpu_num_kcq_user_set;
 
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..18b93ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct 
amdgpu_device *adev)
 
amdgpu_gmc_tmz_set(adev);
 
+   if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0) {
+   amdgpu_num_kcq_user_set = 8;
+   dev_warn(adev-dev, "set KCQ number to 8 due to invalid paramter 
provided by user\n");
+   }
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..03a94e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq_user_set = 8;
 
 struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to 
greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..0b59049 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-   int i, queue, pipe, mec;
+   int i, queue, pipe, mec, j = 0;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
 
/* policy for amdgpu compute queue ownership */
@@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct 
amdgpu_device *adev)
 
if (multipipe_policy) {
/* policy: amdgpu owns the first two queues of the 
first MEC */
-   if (mec == 0 && queue < 2)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
+   if (mec == 0 && queue < 2) {
+   if (j++ < adev->gfx.num_compute_rings)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   else
+   break;
+   }
} else {
/* policy: amdgpu owns all queues in the first pipe */
-   if (mec == 0 && pipe == 0)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
+   if (mec == 0 && pipe == 0) {
+   if (j++ < adev->gfx.num_compute_rings)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   else
+   

[PATCH] drm amdgpu: Skip tmr load for SRIOV

2020-07-27 Thread Liu ChengZhe
From: root 

1. For Navi12, Navi21, skip tmr load operation;
2. Check pointer before release firmware.

Signed-off-by: root 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 40 +
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a053b7af0680..b0717b16b5d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -193,12 +193,18 @@ static int psp_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
psp_memory_training_fini(>psp);
-   release_firmware(adev->psp.sos_fw);
-   adev->psp.sos_fw = NULL;
-   release_firmware(adev->psp.asd_fw);
-   adev->psp.asd_fw = NULL;
-   release_firmware(adev->psp.ta_fw);
-   adev->psp.ta_fw = NULL;
+   if (adev->psp.sos_fw) {
+   release_firmware(adev->psp.sos_fw);
+   adev->psp.sos_fw = NULL;
+   }
+   if (adev->psp.asd_fw) {
+   release_firmware(adev->psp.asd_fw);
+   adev->psp.asd_fw = NULL;
+   }
+   if (adev->psp.ta_fw) {
+   release_firmware(adev->psp.ta_fw);
+   adev->psp.ta_fw = NULL;
+   }
 
if (adev->asic_type == CHIP_NAVI10)
psp_sysfs_fini(adev);
@@ -409,11 +415,33 @@ static int psp_clear_vf_fw(struct psp_context *psp)
return ret;
 }
 
+static bool psp_skip_tmr(struct psp_context *psp)
+{
+   bool ret = false;
+
+   switch (psp->adev->asic_type) {
+   case CHIP_NAVI12:
+   case CHIP_SIENNA_CICHLID:
+   ret = true;
+   break;
+   default:
+   return false;
+   }
+
+   return ret;
+}
+
 static int psp_tmr_load(struct psp_context *psp)
 {
int ret;
struct psp_gfx_cmd_resp *cmd;
 
+   /* for Navi12 and Navi21 SRIOV, do not setup TMR
+* (already setup by host driver)
+*/
+   if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+   return 0;
+
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: enable umc 8.7 functions in gmc v10

2020-07-27 Thread Christian König

Please use the "git send-email" command to send patches to the mailing list.

Christian.


Am 27.07.20 um 10:32 schrieb Clements, John:


[AMD Public Use]


Submitting patch to enable UMC 8.7 GECC functions in GMC v10


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH -next 1/2] drm: Remove redundant NULL check

2020-07-27 Thread daniel
On Thu, Jul 23, 2020 at 11:27:42AM +0800, Li Heng wrote:
> Fix below warnings reported by coccicheck:
> ./drivers/gpu/drm/drm_drv.c:819:2-7: WARNING: NULL check before some freeing 
> functions is not needed.
> 
> Fixes: 5dad34f3c444 ("drm: Cleanups after drmm_add_final_kfree rollout")
> Signed-off-by: Li Heng 

Queued up, should make it into 5.9 merge window, thanks for your patch.
-Daniel


> ---
>  drivers/gpu/drm/drm_drv.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> index bc38322..13068fd 100644
> --- a/drivers/gpu/drm/drm_drv.c
> +++ b/drivers/gpu/drm/drm_drv.c
> @@ -815,8 +815,7 @@ static void drm_dev_release(struct kref *ref)
>  
>   drm_managed_release(dev);
>  
> - if (dev->managed.final_kfree)
> - kfree(dev->managed.final_kfree);
> + kfree(dev->managed.final_kfree);
>  }
>  
>  /**
> -- 
> 2.7.4
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want

2020-07-27 Thread Christian König

Am 27.07.20 um 10:21 schrieb Monk Liu:

what:
KCQ cost many clocks during world switch which impacts a lot to multi-VF
performance

how:
introduce a paramter to control the number of KCQ to avoid performance
drop if there is no KQC needed

notes:
this paramter only affects gfx 8/9/10


Sounds like a good idea to me, but that needs a different name. Outside 
AMD most people don't know what a KCQ is.


Just use compute queue or similar as name for this.

Another comment below.



Signed-off-by: Monk Liu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 27 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++--
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 ++--
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 +++---
  7 files changed, 69 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..71a3d6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
  #ifdef CONFIG_DRM_AMDGPU_CIK
  extern int amdgpu_cik_support;
  #endif
+extern int amdgpu_num_kcq_user_set;
  
  #define AMDGPU_VM_MAX_NUM_CTX			4096

  #define AMDGPU_SG_THRESHOLD   (256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..61c7583 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct 
amdgpu_device *adev)
  
  	amdgpu_gmc_tmz_set(adev);
  
+	if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)

+   amdgpu_num_kcq_user_set = 8;


This needs a warning or error message if we overwrite invalid user 
provided parameters.


Christian.


+
return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 6291f5f..03a94e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
  int amdgpu_force_asic_type = -1;
  int amdgpu_tmz = 0;
  int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq_user_set = 8;
  
  struct amdgpu_mgpu_info mgpu_info = {

.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
  MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 
1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
  module_param_named(reset_method, amdgpu_reset_method, int, 0444);
  
+MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");

+module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
+
  static const struct pci_device_id pciidlist[] = {
  #ifdef  CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..0b59049 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
amdgpu_device *adev,
  
  void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)

  {
-   int i, queue, pipe, mec;
+   int i, queue, pipe, mec, j = 0;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
  
  	/* policy for amdgpu compute queue ownership */

@@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct 
amdgpu_device *adev)
  
  		if (multipipe_policy) {

/* policy: amdgpu owns the first two queues of the 
first MEC */
-   if (mec == 0 && queue < 2)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
+   if (mec == 0 && queue < 2) {
+   if (j++ < adev->gfx.num_compute_rings)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   else
+   break;
+   }
} else {
/* policy: amdgpu owns all queues in the first pipe */
-   if (mec == 0 && pipe == 0)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
+   if (mec == 0 && pipe == 0) {
+   if (j++ < adev->gfx.num_compute_rings)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   else
+   break;
+

RE: [PATCH] drm/amdgpu: enable umc 8.7 functions in gmc v10

2020-07-27 Thread Chen, Guchun
[AMD Public Use]

One typo in commit subject.

add support for umc 8.7 initialzation and RAS interrupt

s/initialzation /initialization

With this fixed, the patch is:
Reviewed-by: Guchun Chen guchun.c...@amd.com

Regards,
Guchun

From: amd-gfx  On Behalf Of Clements, 
John
Sent: Monday, July 27, 2020 4:32 PM
To: amd-gfx list ; Zhang, Hawking 

Subject: [PATCH] drm/amdgpu: enable umc 8.7 functions in gmc v10


[AMD Public Use]

Submitting patch to enable UMC 8.7 GECC functions in GMC v10
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: skip crit temperature values on APU

2020-07-27 Thread Wang, Kevin(Yang)
[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Kevin Wang 

Best Regards,
Kevin

From: amd-gfx  on behalf of Huang Rui 

Sent: Monday, July 27, 2020 4:23 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Huang, Ray 
Subject: [PATCH] drm/amdgpu: skip crit temperature values on APU

It doesn't expose PPTable descriptor on APU platform. So max/min
temperature values cannot be got from APU platform.

Signed-off-by: Huang Rui 
---

This patch needs to backport to stable tree.

Thanks,
Ray

---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 5f20cadee343..65ddf575ccbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -3212,6 +3212,12 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
  attr == _dev_attr_fan1_enable.dev_attr.attr))
 return 0;

+   /* Skip crit temp on APU */
+   if ((adev->flags & AMD_IS_APU) && (adev->family >= AMDGPU_FAMILY_RV) &&
+   (attr == _dev_attr_temp1_crit.dev_attr.attr ||
+attr == _dev_attr_temp1_crit_hyst.dev_attr.attr))
+   return 0;
+
 /* Skip limit attributes if DPM is not enabled */
 if (!adev->pm.dpm_enabled &&
 (attr == _dev_attr_temp1_crit.dev_attr.attr ||
--
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7CKevin1.Wang%40amd.com%7C73c50b729f024e67b23c08d832067d7c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637314350778753140sdata=D3wb3uQxXJJkj%2Bb4XuBxrb2oYnTUq9jHQNiAe4ctSlU%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: enable umc 8.7 functions in gmc v10

2020-07-27 Thread Clements, John
[AMD Public Use]

Submitting patch to enable UMC 8.7 GECC functions in GMC v10


0001-drm-amdgpu-enable-umc-8.7-functions-in-gmc-v10.patch
Description: 0001-drm-amdgpu-enable-umc-8.7-functions-in-gmc-v10.patch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: skip crit temperature values on APU

2020-07-27 Thread Huang Rui
It doesn't expose PPTable descriptor on APU platform. So max/min
temperature values cannot be got from APU platform.

Signed-off-by: Huang Rui 
---

This patch needs to backport to stable tree.

Thanks,
Ray

---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 5f20cadee343..65ddf575ccbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -3212,6 +3212,12 @@ static umode_t hwmon_attributes_visible(struct kobject 
*kobj,
 attr == _dev_attr_fan1_enable.dev_attr.attr))
return 0;
 
+   /* Skip crit temp on APU */
+   if ((adev->flags & AMD_IS_APU) && (adev->family >= AMDGPU_FAMILY_RV) &&
+   (attr == _dev_attr_temp1_crit.dev_attr.attr ||
+attr == _dev_attr_temp1_crit_hyst.dev_attr.attr))
+   return 0;
+
/* Skip limit attributes if DPM is not enabled */
if (!adev->pm.dpm_enabled &&
(attr == _dev_attr_temp1_crit.dev_attr.attr ||
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want

2020-07-27 Thread Monk Liu
what:
KCQ cost many clocks during world switch which impacts a lot to multi-VF
performance

how:
introduce a paramter to control the number of KCQ to avoid performance
drop if there is no KQC needed

notes:
this paramter only affects gfx 8/9/10

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 27 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 29 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 31 +++---
 7 files changed, 69 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..71a3d6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 extern int amdgpu_cik_support;
 #endif
+extern int amdgpu_num_kcq_user_set;
 
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..61c7583 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct 
amdgpu_device *adev)
 
amdgpu_gmc_tmz_set(adev);
 
+   if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0)
+   amdgpu_num_kcq_user_set = 8;
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..03a94e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq_user_set = 8;
 
 struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to 
greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..0b59049 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-   int i, queue, pipe, mec;
+   int i, queue, pipe, mec, j = 0;
bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
 
/* policy for amdgpu compute queue ownership */
@@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct 
amdgpu_device *adev)
 
if (multipipe_policy) {
/* policy: amdgpu owns the first two queues of the 
first MEC */
-   if (mec == 0 && queue < 2)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
+   if (mec == 0 && queue < 2) {
+   if (j++ < adev->gfx.num_compute_rings)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   else
+   break;
+   }
} else {
/* policy: amdgpu owns all queues in the first pipe */
-   if (mec == 0 && pipe == 0)
-   set_bit(i, adev->gfx.mec.queue_bitmap);
+   if (mec == 0 && pipe == 0) {
+   if (j++ < adev->gfx.num_compute_rings)
+   set_bit(i, adev->gfx.mec.queue_bitmap);
+   else
+   break;
+   }
}
}
 
-   /* update the number of active compute rings */
-   adev->gfx.num_compute_rings =
-   bitmap_weight(adev->gfx.mec.queue_bitmap, 
AMDGPU_MAX_COMPUTE_QUEUES);
-
-   /* If you hit this case and edited the policy, you probably just
-* need to increase