[PATCH] drm/amdgpu: convert bios_hardcoded_edid to drm_edid

2024-06-17 Thread Thomas Weißschuh
Instead of manually passing around 'struct edid *' and its size,
use 'struct drm_edid', which encapsulates a validated combination of
both.

As the drm_edid_ can handle NULL gracefully, the explicit checks can be
dropped.

Also save a few characters by transforming '&array[0]' to the equivalent
'array' and using 'max_t(int, ...)' instead of manual casts.

Signed-off-by: Thomas Weißschuh 
---
While this patch introduces a new user for drm_edid_raw(),
if amdgpu proper gets migrated to 'struct drm_edid', that usage will go
away.

This is only compile-tested.

I have some more patches for the rest of amdgpu,
to move to 'struct drm_edid'.
This patch is a test-balloon for the general idea.

The same can also be done for drm/radeon.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 21 +++--
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c  |  2 +-
 8 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 9caba10315a8..f1b11b27cce0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -265,11 +265,7 @@ struct edid *amdgpu_connector_edid(struct drm_connector 
*connector)
 static struct edid *
 amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev)
 {
-   if (adev->mode_info.bios_hardcoded_edid) {
-   return kmemdup((unsigned char 
*)adev->mode_info.bios_hardcoded_edid,
-  adev->mode_info.bios_hardcoded_edid_size, 
GFP_KERNEL);
-   }
-   return NULL;
+   return 
drm_edid_duplicate(drm_edid_raw(adev->mode_info.bios_hardcoded_edid));
 }
 
 static void amdgpu_connector_get_edid(struct drm_connector *connector)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 1fe21a70ddd0..928ac3f1e2ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -51,6 +51,7 @@ struct amdgpu_encoder;
 struct amdgpu_router;
 struct amdgpu_hpd;
 struct edid;
+struct drm_edid;
 
 #define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base)
 #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
@@ -325,8 +326,7 @@ struct amdgpu_mode_info {
/* FMT dithering */
struct drm_property *dither_property;
/* hardcoded DFP edid from BIOS */
-   struct edid *bios_hardcoded_edid;
-   int bios_hardcoded_edid_size;
+   const struct drm_edid *bios_hardcoded_edid;
 
/* firmware flags */
u32 firmware_flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index e30eecd02ae1..543275db8302 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -539,7 +539,7 @@ static int amdgpu_vkms_sw_fini(void *handle)
 
adev->mode_info.mode_config_initialized = false;
 
-   kfree(adev->mode_info.bios_hardcoded_edid);
+   drm_edid_free(adev->mode_info.bios_hardcoded_edid);
kfree(adev->amdgpu_vkms_output);
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c 
b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 25feab188dfe..90383094ed1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -2064,20 +2064,13 @@ amdgpu_atombios_encoder_get_lcd_info(struct 
amdgpu_encoder *encoder)
case LCD_FAKE_EDID_PATCH_RECORD_TYPE:
fake_edid_record = 
(ATOM_FAKE_EDID_PATCH_RECORD *)record;
if (fake_edid_record->ucFakeEDIDLength) 
{
-   struct edid *edid;
-   int edid_size =
-   max((int)EDID_LENGTH, 
(int)fake_edid_record->ucFakeEDIDLength);
-   edid = kmalloc(edid_size, 
GFP_KERNEL);
-   if (edid) {
-   memcpy((u8 *)edid, (u8 
*)&fake_edid_record->ucFakeEDIDString[0],
-  
fake_edid_record->ucFakeEDIDLength);
-
-   if 
(drm_edid_is_valid(edid)) {
-   
adev->mode_info.bios_hardcoded_edid = edid;
-   
adev-

[bug report] drm/amdgpu: add init support for GFX11 (v2)

2024-06-17 Thread Dan Carpenter
Hello Hawking Zhang,

Commit 3d879e81f0f9 ("drm/amdgpu: add init support for GFX11 (v2)")
from Apr 13, 2022 (linux-next), leads to the following Smatch static
checker warning:

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c:4503 gfx_v11_0_hw_init()
error: we previously assumed 'adev->gfx.imu.funcs' could be null (see 
line 4497)

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
4491 static int gfx_v11_0_hw_init(void *handle)
4492 {
4493 int r;
4494 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4495 
4496 if (adev->firmware.load_type == 
AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4497 if (adev->gfx.imu.funcs) {
 ^^^
Check for NULL

4498 /* RLC autoload sequence 1: Program rlc ram */
4499 if (adev->gfx.imu.funcs->program_rlc_ram)
4500 
adev->gfx.imu.funcs->program_rlc_ram(adev);
4501 }
4502 /* rlc autoload firmware */
--> 4503 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);

Unchecked dereference inside the function.  (Probably just delete the
NULL check?)

4504 if (r)
4505 return r;
4506 } else {

regards,
dan carpenter


Re: [bug report] drm/amdgpu: amdgpu crash on playing videos, linux 6.10-rc

2024-06-17 Thread Winston Ma
I only build the kernel once. I could try but I think you couldn't
expect much from my side.

BTW I installed 6.10-rc4 this morning from Ubuntu mainline
(https://kernel.ubuntu.com/mainline/v6.10-rc4/amd64/) and I couldn't
replicate the video crash problem. Yunchen could you try 6.10-rc4 and
see if you still have the video crash problem?

But I still get the green blocky object when I keep toggling full
screen during youtube watch (Screenshot: https://ibb.co/8Dpdxc3). I
didn't see the green block in 6.9 so it could be another issue.

Thanks and Regards,
Winston


On Sun, Jun 16, 2024 at 12:10 AM Wang Yunchen  wrote:
>
> On Sat, 2024-06-15 at 17:50 +0200, Thorsten Leemhuis wrote:
> > [reply made easier by moving something in the quote]
> >
> > On 12.06.24 18:55, Wang Yunchen wrote:
> > > On Wed, 2024-06-12 at 15:14 +0200, Linux regression tracking (Thorsten
> > > Leemhuis) wrote:
> > > > On 06.06.24 05:06, Winston Ma wrote:
> > > > > Hi I got the same problem on Linux Kernel 6.10-rc2. I got the problem
> > > > > by
> > > > > following the procedure below:
> > > > >
> > > > >  1. Boot Linux Kernel 6.10-rc2
> > > > >  2. Open Firefox (Any browser should work)
> > > > >  3. Open a Youtube Video
> > > > >  4. On the playing video, toggle fullscreen quickly Then after 10-20
> > > > > times of fullscreen toggling, the screen would enter freeze mode.
> > > > > This is the log that I captured using the above method.
> > > >
> > > > Hmm, seems nothing happened here for a while. Could you maybe try to
> > > > bisect this
> > > > (
> > > > https://docs.kernel.org/admin-guide/verify-bugs-and-bisect-regressions.ht
> > > > ml
> > > > )?
> > >
> > > It seems that the issue persists on 6.10 rc3.
> >
> > That's good to know, but...
> >
> > > > @amd-gfx devs: Or is this unneeded, as the cause found or maybe even
> > > > fixed meanwhile?
> >
> > ...as there was no reply to that inquiry it seems we really need either
> > you or Winston Ma (or somebody else that is affected we don't yet know
> > about) to perform a git bisection (see the link quoted above) to find
> > the exact change that broke things. Without this it might not be getting
> > fixed.
> >
> > Ciao, Thorsten
> >
> > > > > This is the kernel log
> > > > >
> > > > > 2024-06-06T10:26:40.747576+08:00 kernel: gmc_v10_0_process_interrupt:
> > > > > 6 callbacks suppressed
> > > > > 2024-06-06T10:26:40.747618+08:00 kernel: amdgpu :03:00.0: amdgpu:
> > > > > [mmhub] page fault (src_id:0 ring:8 vmid:2
> > > > > pasid:32789)
> > > > > 2024-06-06T10:26:40.747623+08:00 kernel: amdgpu :03:00.0: amdgpu:
> > > > > in process RDD Process pid 39524 thread
> > > > > firefox-bi:cs0 pid 40342
> > > > > 2024-06-06T10:26:40.747625+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:   in page starting at address
> > > > > 0x800106ffe000 from client 0x12 (VMC)
> > > > > 2024-06-06T10:26:40.747628+08:00 kernel: amdgpu :03:00.0: amdgpu:
> > > > > MMVM_L2_PROTECTION_FAULT_STATUS:0x00203811
> > > > > 2024-06-06T10:26:40.747629+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:  Faulty UTCL2 client ID: VCN (0x1c)
> > > > > 2024-06-06T10:26:40.747631+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:  MORE_FAULTS: 0x1
> > > > > 2024-06-06T10:26:40.747651+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:  WALKER_ERROR: 0x0
> > > > > 2024-06-06T10:26:40.747653+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:  PERMISSION_FAULTS: 0x1
> > > > > 2024-06-06T10:26:40.747655+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:  MAPPING_ERROR: 0x0
> > > > > 2024-06-06T10:26:40.747656+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:  RW: 0x0
> > > > > 2024-06-06T10:26:40.747658+08:00 kernel: amdgpu :03:00.0: amdgpu:
> > > > > [mmhub] page fault (src_id:0 ring:8 vmid:2
> > > > > pasid:32789)
> > > > > 2024-06-06T10:26:40.747660+08:00 kernel: amdgpu :03:00.0: amdgpu:
> > > > > in process RDD Process pid 39524 thread
> > > > > firefox-bi:cs0 pid 40342
> > > > > 2024-06-06T10:26:40.747662+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:   in page starting at address
> > > > > 0x800106e0 from client 0x12 (VMC)
> > > > > 2024-06-06T10:26:40.747663+08:00 kernel: amdgpu :03:00.0: amdgpu:
> > > > > MMVM_L2_PROTECTION_FAULT_STATUS:0x
> > > > > 2024-06-06T10:26:40.747664+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:  Faulty UTCL2 client ID: MP0 (0x0)
> > > > > 2024-06-06T10:26:40.747666+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:  MORE_FAULTS: 0x0
> > > > > 2024-06-06T10:26:40.747667+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:  WALKER_ERROR: 0x0
> > > > > 2024-06-06T10:26:40.747668+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:  PERMISSION_FAULTS: 0x0
> > > > > 2024-06-06T10:26:40.747670+08:00 kernel: amdgpu :03:00.0:
> > > > > amdgpu:  MAPPING_ERROR: 0x0
> > > > > 2024-06-06T10:26:40.747671+08:00 kernel: amdgpu :03:00.0

Re: [bug report] drm/amdgpu: amdgpu crash on playing videos, linux 6.10-rc

2024-06-17 Thread Wang Yunchen
On Sat, 2024-06-15 at 17:50 +0200, Thorsten Leemhuis wrote:
> [reply made easier by moving something in the quote]
> 
> On 12.06.24 18:55, Wang Yunchen wrote:
> > On Wed, 2024-06-12 at 15:14 +0200, Linux regression tracking (Thorsten
> > Leemhuis) wrote:
> > > On 06.06.24 05:06, Winston Ma wrote:
> > > > Hi I got the same problem on Linux Kernel 6.10-rc2. I got the problem
> > > > by
> > > > following the procedure below:
> > > > 
> > > >  1. Boot Linux Kernel 6.10-rc2
> > > >  2. Open Firefox (Any browser should work)
> > > >  3. Open a Youtube Video
> > > >  4. On the playing video, toggle fullscreen quickly Then after 10-20
> > > >     times of fullscreen toggling, the screen would enter freeze mode.
> > > >     This is the log that I captured using the above method.
> > > 
> > > Hmm, seems nothing happened here for a while. Could you maybe try to
> > > bisect this
> > > (
> > > https://docs.kernel.org/admin-guide/verify-bugs-and-bisect-regressions.ht
> > > ml
> > > )?
> > 
> > It seems that the issue persists on 6.10 rc3.
> 
> That's good to know, but...
> 
> > > @amd-gfx devs: Or is this unneeded, as the cause found or maybe even
> > > fixed meanwhile?
> 
> ...as there was no reply to that inquiry it seems we really need either
> you or Winston Ma (or somebody else that is affected we don't yet know
> about) to perform a git bisection (see the link quoted above) to find
> the exact change that broke things. Without this it might not be getting
> fixed.
> 
> Ciao, Thorsten
> 
> > > > This is the kernel log
> > > > 
> > > > 2024-06-06T10:26:40.747576+08:00 kernel: gmc_v10_0_process_interrupt:
> > > > 6 callbacks suppressed
> > > > 2024-06-06T10:26:40.747618+08:00 kernel: amdgpu :03:00.0: amdgpu:
> > > > [mmhub] page fault (src_id:0 ring:8 vmid:2
> > > > pasid:32789)
> > > > 2024-06-06T10:26:40.747623+08:00 kernel: amdgpu :03:00.0: amdgpu: 
> > > > in process RDD Process pid 39524 thread
> > > > firefox-bi:cs0 pid 40342
> > > > 2024-06-06T10:26:40.747625+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:   in page starting at address
> > > > 0x800106ffe000 from client 0x12 (VMC)
> > > > 2024-06-06T10:26:40.747628+08:00 kernel: amdgpu :03:00.0: amdgpu:
> > > > MMVM_L2_PROTECTION_FAULT_STATUS:0x00203811
> > > > 2024-06-06T10:26:40.747629+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  Faulty UTCL2 client ID: VCN (0x1c)
> > > > 2024-06-06T10:26:40.747631+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  MORE_FAULTS: 0x1
> > > > 2024-06-06T10:26:40.747651+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  WALKER_ERROR: 0x0
> > > > 2024-06-06T10:26:40.747653+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  PERMISSION_FAULTS: 0x1
> > > > 2024-06-06T10:26:40.747655+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  MAPPING_ERROR: 0x0
> > > > 2024-06-06T10:26:40.747656+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  RW: 0x0
> > > > 2024-06-06T10:26:40.747658+08:00 kernel: amdgpu :03:00.0: amdgpu:
> > > > [mmhub] page fault (src_id:0 ring:8 vmid:2
> > > > pasid:32789)
> > > > 2024-06-06T10:26:40.747660+08:00 kernel: amdgpu :03:00.0: amdgpu: 
> > > > in process RDD Process pid 39524 thread
> > > > firefox-bi:cs0 pid 40342
> > > > 2024-06-06T10:26:40.747662+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:   in page starting at address
> > > > 0x800106e0 from client 0x12 (VMC)
> > > > 2024-06-06T10:26:40.747663+08:00 kernel: amdgpu :03:00.0: amdgpu:
> > > > MMVM_L2_PROTECTION_FAULT_STATUS:0x
> > > > 2024-06-06T10:26:40.747664+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  Faulty UTCL2 client ID: MP0 (0x0)
> > > > 2024-06-06T10:26:40.747666+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  MORE_FAULTS: 0x0
> > > > 2024-06-06T10:26:40.747667+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  WALKER_ERROR: 0x0
> > > > 2024-06-06T10:26:40.747668+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  PERMISSION_FAULTS: 0x0
> > > > 2024-06-06T10:26:40.747670+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  MAPPING_ERROR: 0x0
> > > > 2024-06-06T10:26:40.747671+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  RW: 0x0
> > > > 2024-06-06T10:26:40.747674+08:00 kernel: amdgpu :03:00.0: amdgpu:
> > > > [mmhub] page fault (src_id:0 ring:8 vmid:2
> > > > pasid:32789)
> > > > 2024-06-06T10:26:40.747677+08:00 kernel: amdgpu :03:00.0: amdgpu: 
> > > > in process RDD Process pid 39524 thread
> > > > firefox-bi:cs0 pid 40342
> > > > 2024-06-06T10:26:40.747680+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:   in page starting at address
> > > > 0x800106e07000 from client 0x12 (VMC)
> > > > 2024-06-06T10:26:40.747683+08:00 kernel: amdgpu :03:00.0: amdgpu:
> > > > MMVM_L2_PROTECTION_FAULT_STATUS:0x
> > > > 2024-06-06T10:26:40.747686+08:00 kernel: amdgpu :03:00.0:
> > > > amdgpu:  Faulty UTCL2 client ID: MP0 (0x0)
> > > > 2024-06-06T10:

Re: [PATCH] drm/amdgpu: convert bios_hardcoded_edid to drm_edid

2024-06-17 Thread Thomas Weißschuh
On 2024-06-16 11:12:03+, Thomas Weißschuh wrote:
> Instead of manually passing around 'struct edid *' and its size,
> use 'struct drm_edid', which encapsulates a validated combination of
> both.
> 
> As the drm_edid_ can handle NULL gracefully, the explicit checks can be
> dropped.
> 
> Also save a few characters by transforming '&array[0]' to the equivalent
> 'array' and using 'max_t(int, ...)' instead of manual casts.
> 
> Signed-off-by: Thomas Weißschuh 
> ---
> While this patch introduces a new user for drm_edid_raw(),
> if amdgpu proper gets migrated to 'struct drm_edid', that usage will go
> away.
> 
> This is only compile-tested.
> 
> I have some more patches for the rest of amdgpu,
> to move to 'struct drm_edid'.
> This patch is a test-balloon for the general idea.
> 
> The same can also be done for drm/radeon.
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c |  6 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h   |  4 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 21 +++--
>  drivers/gpu/drm/amd/amdgpu/dce_v10_0.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/dce_v11_0.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c  |  2 +-
>  drivers/gpu/drm/amd/amdgpu/dce_v8_0.c  |  2 +-
>  8 files changed, 15 insertions(+), 26 deletions(-)


 
> diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c 
> b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
> index 25feab188dfe..90383094ed1e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
> +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
> @@ -2064,20 +2064,13 @@ amdgpu_atombios_encoder_get_lcd_info(struct 
> amdgpu_encoder *encoder)
>   case LCD_FAKE_EDID_PATCH_RECORD_TYPE:
>   fake_edid_record = 
> (ATOM_FAKE_EDID_PATCH_RECORD *)record;
>   if (fake_edid_record->ucFakeEDIDLength) 
> {
> - struct edid *edid;
> - int edid_size =
> - max((int)EDID_LENGTH, 
> (int)fake_edid_record->ucFakeEDIDLength);
> - edid = kmalloc(edid_size, 
> GFP_KERNEL);
> - if (edid) {
> - memcpy((u8 *)edid, (u8 
> *)&fake_edid_record->ucFakeEDIDString[0],
> -
> fake_edid_record->ucFakeEDIDLength);
> -
> - if 
> (drm_edid_is_valid(edid)) {
> - 
> adev->mode_info.bios_hardcoded_edid = edid;
> - 
> adev->mode_info.bios_hardcoded_edid_size = edid_size;
> - } else
> - kfree(edid);
> - }
> + const struct drm_edid *edid;
> + edid = 
> drm_edid_alloc(fake_edid_record->ucFakeEDIDString,
> +   
> max_t(int, EDID_LENGTH, fake_edid_record->ucFakeEDIDLength));
> + if (drm_edid_valid(edid))
> + 
> adev->mode_info.bios_hardcoded_edid = edid;
> + else
> + drm_edid_free(edid);

The old code here seems broken in general.
In drivers/gpu/drm/amd/include/atombios.h the comment for ucFakeEDIDLength says:
(I expect the same field in the same struct for amdgpu to have the same 
semantics)

UCHAR ucFakeEDIDLength;   // = 128 means EDID length is 128 bytes, 
otherwise the EDID length = ucFakeEDIDLength*128

So as soon as the EDID from the BIOS has extensions, only the first few
bytes will be copied into the allocated memory. drm_edid_is_valid() will
then read the uninitialized memory and if the "extensions" field ends up
non-zero it will happily "validate" past the allocated buffer.

The new code won't work either but at least it won't read uninitialized
memory nor will it read past the buffer bounds.

>   }
>   record += 
> fake_edid_record->ucFakeEDIDLength ?
> struct_size(fake_edid_record,




Re: [bug report] drm/amdgpu: amdgpu crash on playing videos, linux 6.10-rc

2024-06-17 Thread Thorsten Leemhuis
[reply made easier by moving something in the quote]

On 12.06.24 18:55, Wang Yunchen wrote:
> On Wed, 2024-06-12 at 15:14 +0200, Linux regression tracking (Thorsten 
> Leemhuis) wrote:
>> On 06.06.24 05:06, Winston Ma wrote:
>>> Hi I got the same problem on Linux Kernel 6.10-rc2. I got the problem by
>>> following the procedure below:
>>>
>>>  1. Boot Linux Kernel 6.10-rc2
>>>  2. Open Firefox (Any browser should work)
>>>  3. Open a Youtube Video
>>>  4. On the playing video, toggle fullscreen quickly Then after 10-20
>>>     times of fullscreen toggling, the screen would enter freeze mode.
>>>     This is the log that I captured using the above method.
>>
>> Hmm, seems nothing happened here for a while. Could you maybe try to
>> bisect this
>> (https://docs.kernel.org/admin-guide/verify-bugs-and-bisect-regressions.html
>> )?
>
> It seems that the issue persists on 6.10 rc3.

That's good to know, but...

>> @amd-gfx devs: Or is this unneeded, as the cause found or maybe even
>> fixed meanwhile?

...as there was no reply to that inquiry it seems we really need either
you or Winston Ma (or somebody else that is affected we don't yet know
about) to perform a git bisection (see the link quoted above) to find
the exact change that broke things. Without this it might not be getting
fixed.

Ciao, Thorsten

>>> This is the kernel log
>>>
>>> 2024-06-06T10:26:40.747576+08:00 kernel: gmc_v10_0_process_interrupt: 6 
>>> callbacks suppressed
>>> 2024-06-06T10:26:40.747618+08:00 kernel: amdgpu :03:00.0: amdgpu: 
>>> [mmhub] page fault (src_id:0 ring:8 vmid:2
>>> pasid:32789)
>>> 2024-06-06T10:26:40.747623+08:00 kernel: amdgpu :03:00.0: amdgpu:  in 
>>> process RDD Process pid 39524 thread
>>> firefox-bi:cs0 pid 40342
>>> 2024-06-06T10:26:40.747625+08:00 kernel: amdgpu :03:00.0: amdgpu:   in 
>>> page starting at address
>>> 0x800106ffe000 from client 0x12 (VMC)
>>> 2024-06-06T10:26:40.747628+08:00 kernel: amdgpu :03:00.0: amdgpu: 
>>> MMVM_L2_PROTECTION_FAULT_STATUS:0x00203811
>>> 2024-06-06T10:26:40.747629+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  Faulty UTCL2 client ID: VCN (0x1c)
>>> 2024-06-06T10:26:40.747631+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  MORE_FAULTS: 0x1
>>> 2024-06-06T10:26:40.747651+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  WALKER_ERROR: 0x0
>>> 2024-06-06T10:26:40.747653+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  PERMISSION_FAULTS: 0x1
>>> 2024-06-06T10:26:40.747655+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  MAPPING_ERROR: 0x0
>>> 2024-06-06T10:26:40.747656+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  RW: 0x0
>>> 2024-06-06T10:26:40.747658+08:00 kernel: amdgpu :03:00.0: amdgpu: 
>>> [mmhub] page fault (src_id:0 ring:8 vmid:2
>>> pasid:32789)
>>> 2024-06-06T10:26:40.747660+08:00 kernel: amdgpu :03:00.0: amdgpu:  in 
>>> process RDD Process pid 39524 thread
>>> firefox-bi:cs0 pid 40342
>>> 2024-06-06T10:26:40.747662+08:00 kernel: amdgpu :03:00.0: amdgpu:   in 
>>> page starting at address
>>> 0x800106e0 from client 0x12 (VMC)
>>> 2024-06-06T10:26:40.747663+08:00 kernel: amdgpu :03:00.0: amdgpu: 
>>> MMVM_L2_PROTECTION_FAULT_STATUS:0x
>>> 2024-06-06T10:26:40.747664+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  Faulty UTCL2 client ID: MP0 (0x0)
>>> 2024-06-06T10:26:40.747666+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  MORE_FAULTS: 0x0
>>> 2024-06-06T10:26:40.747667+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  WALKER_ERROR: 0x0
>>> 2024-06-06T10:26:40.747668+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  PERMISSION_FAULTS: 0x0
>>> 2024-06-06T10:26:40.747670+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  MAPPING_ERROR: 0x0
>>> 2024-06-06T10:26:40.747671+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  RW: 0x0
>>> 2024-06-06T10:26:40.747674+08:00 kernel: amdgpu :03:00.0: amdgpu: 
>>> [mmhub] page fault (src_id:0 ring:8 vmid:2
>>> pasid:32789)
>>> 2024-06-06T10:26:40.747677+08:00 kernel: amdgpu :03:00.0: amdgpu:  in 
>>> process RDD Process pid 39524 thread
>>> firefox-bi:cs0 pid 40342
>>> 2024-06-06T10:26:40.747680+08:00 kernel: amdgpu :03:00.0: amdgpu:   in 
>>> page starting at address
>>> 0x800106e07000 from client 0x12 (VMC)
>>> 2024-06-06T10:26:40.747683+08:00 kernel: amdgpu :03:00.0: amdgpu: 
>>> MMVM_L2_PROTECTION_FAULT_STATUS:0x
>>> 2024-06-06T10:26:40.747686+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  Faulty UTCL2 client ID: MP0 (0x0)
>>> 2024-06-06T10:26:40.747688+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  MORE_FAULTS: 0x0
>>> 2024-06-06T10:26:40.747691+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  WALKER_ERROR: 0x0
>>> 2024-06-06T10:26:40.747693+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  PERMISSION_FAULTS: 0x0
>>> 2024-06-06T10:26:40.747696+08:00 kernel: amdgpu :03:00.0: amdgpu:   
>>>  MAPPING_ERROR: 0x0
>>> 2024-06-06T10:26:40.747698+08:00

Re: Kernel 5.15.150 black screen with AMD Raven/Picasso GPU

2024-06-17 Thread Armin Wolf

Am 23.05.24 um 18:29 schrieb Greg KH:


On Thu, May 23, 2024 at 05:59:39PM +0200, Armin Wolf wrote:

Am 23.05.24 um 15:13 schrieb Barry Kauler:


On Wed, May 22, 2024 at 12:58 AM Armin Wolf  wrote:

Am 20.05.24 um 18:22 schrieb Alex Deucher:


On Sat, May 18, 2024 at 8:17 PM Armin Wolf  wrote:

Am 17.05.24 um 03:30 schrieb Barry Kauler:


Armin, Yifan, Prike,
I will top-post, so you don't have to scroll down.
After identifying the commit that causes black screen with my gpu, I
posted the result to you guys, on May 9.
It is now May 17 and no reply.
OK, I have now created a patch that reverts Yifan's commit, compiled
5.15.158, and my gpu now works.
Note, the radeon module is not loaded, so it is not a factor.
I'm not a kernel developer. I have identified the culprit and it is up
to you guys to fix it, Yifan especially, as you are the person who has
created the regression.
I will attach my patch.
Regards,
Barry Kauler

Hi,

sorry for not responding to your findings. I normally do not work with GPU 
drivers,
so i hoped one of the amdgpu developers would handle this.

I cceddri-de...@lists.freedesktop.org  and amd-gfx@lists.freedesktop.org so 
that other
amdgpu developers hear from this issue.

Thanks you for you persistence in finding the offending commit.

Likely this patch should not have been ported to 5.15 in the first
place.  The IOMMU requirements have been dropped from the driver for
the last few kernel versions so it is no longer relevant on newer
kernels.

Alex

Barry, can you verify that the latest upstream kernel works on you device?
If yes, then the commit itself is ok and just the backporting itself was wrong.

Thanks,
Armin Wolf

Armin,
The unmodified 6.8.1 kernel works ok.
I presume that patch was applied long before 6.8.1 got released and
only got backported to 5.15.x recently.

Regards,
Barry


Great to hear, that means we only have to revert commit 56b522f46681 ("drm/amdgpu: 
init iommu after amdkfd device init")
from the 5.15.y series.

I CCed the stable mailing list so that they can revert the offending commit.

Please submit the patch/revert that you wish to have applied to the tree
so we can have the correct information in it.  I have no idea what to do
here with this deep response thread as-is, sorry.

thanks,

greg k-h


Hi,

the new 5.15.161 kernel finally contains the necessary patch (many thanks to 
the stable team :)).

Barry, can you test this kernel version and report if the issue is now gone?

Thanks,
Armin Wolf



[PATCH] drm/amdgpu: normalize registers as local xcc to read/write under sriov

2024-06-17 Thread Jane Jian
[WHY]
sriov has the higher bit violation when flushing tlb

[HOW]
normalize the registers to keep lower 16-bit(dword aligned) to aviod higher bit 
violation
RLCG will mask xcd out and always assume it's accessing its own xcd

also fix the typo in sriov_w/rreg:
for KIQ case, use xcc with xcc_id to read and write

Signed-off-by: Jane Jian 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  | 12 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  9 +++--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  2 ++
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 63f2286858c4..d43652a38484 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1075,6 +1075,10 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev,
if (amdgpu_device_skip_hw_access(adev))
return;
 
+   /* Select lower 16 bits to write in local xcc */
+   if ((hwip == GC_HWIP) && !(acc_flags & AMDGPU_REGS_NO_KIQ))
+   offset = NORMALIZE_XCC_REG_OFFSET(offset);
+
if (!amdgpu_sriov_runtime(adev) &&
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, 
true, &rlcg_flag)) {
amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id);
@@ -1084,7 +1088,7 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev,
if (acc_flags & AMDGPU_REGS_NO_KIQ)
WREG32_NO_KIQ(offset, value);
else
-   WREG32(offset, value);
+   WREG32_XCC(offset, value, xcc_id);
 }
 
 u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
@@ -1095,6 +1099,10 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
if (amdgpu_device_skip_hw_access(adev))
return 0;
 
+   /* Select lower 16 bits to read in local xcc */
+   if ((hwip == GC_HWIP) && !(acc_flags & AMDGPU_REGS_NO_KIQ))
+   offset = NORMALIZE_XCC_REG_OFFSET(offset);
+
if (!amdgpu_sriov_runtime(adev) &&
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, 
false, &rlcg_flag))
return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, 
xcc_id);
@@ -1102,7 +1110,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
if (acc_flags & AMDGPU_REGS_NO_KIQ)
return RREG32_NO_KIQ(offset);
else
-   return RREG32(offset);
+   return RREG32_XCC(offset, xcc_id);
 }
 
 bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 88b4644f8e96..e6c2fcf452d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -853,8 +853,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 */
if (adev->gfx.kiq[inst].ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
-   uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
-   uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+   /* Select lower 16 bits to write in local xcc */
+   if (AMDGPU_IS_GFXHUB(vmhub))
+   {
+   req = NORMALIZE_XCC_REG_OFFSET(req);
+   ack = NORMALIZE_XCC_REG_OFFSET(ack);
+   }
 
amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
 1 << vmid, inst);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 242b24f73c17..9ddf68e7d06e 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -210,4 +210,6 @@
 #define WREG64_MCA(ext, mca_base, idx, val) \
WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + 
mca_base + (idx * 8), val)
 
+#define NORMALIZE_XCC_REG_OFFSET(offset) (offset & 0x)
+
 #endif
-- 
2.34.1



[PATCH] drm/amdgpu: normalize registers as local xcc to read/write under sriov

2024-06-17 Thread Jane Jian
[WHY]
sriov has the higher bit violation when flushing tlb

[HOW]
normalize the registers to keep lower 16-bit(dword aligned) to aviod higher bit 
violation
RLCG will mask xcd out and always assume it's accessing its own xcd

also fix the typo in sriov_w/rreg:
for KIQ case, use xcc with xcc_id to read and write

v2
amend some typos

Signed-off-by: Jane Jian 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  | 12 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  8 ++--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  2 ++
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 63f2286858c4..d43652a38484 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1075,6 +1075,10 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev,
if (amdgpu_device_skip_hw_access(adev))
return;
 
+   /* Select lower 16 bits to write in local xcc */
+   if ((hwip == GC_HWIP) && !(acc_flags & AMDGPU_REGS_NO_KIQ))
+   offset = NORMALIZE_XCC_REG_OFFSET(offset);
+
if (!amdgpu_sriov_runtime(adev) &&
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, 
true, &rlcg_flag)) {
amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id);
@@ -1084,7 +1088,7 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev,
if (acc_flags & AMDGPU_REGS_NO_KIQ)
WREG32_NO_KIQ(offset, value);
else
-   WREG32(offset, value);
+   WREG32_XCC(offset, value, xcc_id);
 }
 
 u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
@@ -1095,6 +1099,10 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
if (amdgpu_device_skip_hw_access(adev))
return 0;
 
+   /* Select lower 16 bits to read in local xcc */
+   if ((hwip == GC_HWIP) && !(acc_flags & AMDGPU_REGS_NO_KIQ))
+   offset = NORMALIZE_XCC_REG_OFFSET(offset);
+
if (!amdgpu_sriov_runtime(adev) &&
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, 
false, &rlcg_flag))
return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, 
xcc_id);
@@ -1102,7 +1110,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
if (acc_flags & AMDGPU_REGS_NO_KIQ)
return RREG32_NO_KIQ(offset);
else
-   return RREG32(offset);
+   return RREG32_XCC(offset, xcc_id);
 }
 
 bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 88b4644f8e96..5bb275b96e6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -853,8 +853,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 */
if (adev->gfx.kiq[inst].ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
-   uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
-   uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+   /* Select lower 16 bits to write in local xcc */
+   if (AMDGPU_IS_GFXHUB(vmhub)) {
+   req = NORMALIZE_XCC_REG_OFFSET(req);
+   ack = NORMALIZE_XCC_REG_OFFSET(ack);
+   }
 
amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
 1 << vmid, inst);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 242b24f73c17..9ddf68e7d06e 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -210,4 +210,6 @@
 #define WREG64_MCA(ext, mca_base, idx, val) \
WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + 
mca_base + (idx * 8), val)
 
+#define NORMALIZE_XCC_REG_OFFSET(offset) (offset & 0x)
+
 #endif
-- 
2.34.1



[PATCH 0/2] Fixes of AMD GFX7/8 hang on Loongson platforms

2024-06-17 Thread Icenowy Zheng
This patchset tries to fix some workaround code in amdgpu/radeon driver,
that makes Loongson 3A+7A platform suffering from GPU crashes.

Icenowy Zheng (2):
  drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content
  drm/radeon: repeat the same EOP packet for EOP workaround on CIK

 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 12 +---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 
 drivers/gpu/drm/radeon/cik.c  |  7 ++-
 3 files changed, 11 insertions(+), 20 deletions(-)

-- 
2.45.1



[PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Icenowy Zheng
The duplication of EOP packets for GFX7/8, with the former one have
seq-1 written and the latter one have seq written, seems to confuse some
hardware platform (e.g. Loongson 7A series PCIe controllers).

Make the content of the duplicated EOP packet the same with the real
one, only masking any possible interrupts.

Fixes: bf26da927a1c ("drm/amdgpu: add cache flush workaround to gfx8 
emit_fence")
Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts")
Signed-off-by: Icenowy Zheng 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 12 +---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 541dbd70d8c75..778f27f1a34fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2117,9 +2117,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct 
amdgpu_ring *ring, u64 addr,
 {
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
-   /* Workaround for cache flush problems. First send a dummy EOP
-* event down the pipe with seq one below.
-*/
+
+   /* Workaround for cache flush problems, send EOP twice. */
amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
 EOP_TC_ACTION_EN |
@@ -2127,11 +2126,10 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct 
amdgpu_ring *ring, u64 addr,
 EVENT_INDEX(5)));
amdgpu_ring_write(ring, addr & 0xfffc);
amdgpu_ring_write(ring, (upper_32_bits(addr) & 0x) |
-   DATA_SEL(1) | INT_SEL(0));
-   amdgpu_ring_write(ring, lower_32_bits(seq - 1));
-   amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+   DATA_SEL(write64bit ? 2 : 1) | INT_SEL(0));
+   amdgpu_ring_write(ring, lower_32_bits(seq));
+   amdgpu_ring_write(ring, upper_32_bits(seq));
 
-   /* Then send the real EOP event down the pipe. */
amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
 EOP_TC_ACTION_EN |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 2f0e72caee1af..39a7d60f1fd69 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6153,9 +6153,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct 
amdgpu_ring *ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
 
-   /* Workaround for cache flush problems. First send a dummy EOP
-* event down the pipe with seq one below.
-*/
+   /* Workaround for cache flush problems, send EOP twice. */
amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
 EOP_TC_ACTION_EN |
@@ -6164,12 +6162,10 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct 
amdgpu_ring *ring, u64 addr,
 EVENT_INDEX(5)));
amdgpu_ring_write(ring, addr & 0xfffc);
amdgpu_ring_write(ring, (upper_32_bits(addr) & 0x) |
-   DATA_SEL(1) | INT_SEL(0));
-   amdgpu_ring_write(ring, lower_32_bits(seq - 1));
-   amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+ DATA_SEL(write64bit ? 2 : 1) | INT_SEL(0));
+   amdgpu_ring_write(ring, lower_32_bits(seq));
+   amdgpu_ring_write(ring, upper_32_bits(seq));
 
-   /* Then send the real EOP event down the pipe:
-* EVENT_WRITE_EOP - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
 EOP_TC_ACTION_EN |
-- 
2.45.1



[PATCH 2/2] drm/radeon: repeat the same EOP packet for EOP workaround on CIK

2024-06-17 Thread Icenowy Zheng
Ths first EOP packet with a sequence number as seq-1 seems to confuse
some PCIe hardware (e.g. Loongson 7A PCHs).

Use the real sequence number instead.

Fixes: a9c73a0e022c ("drm/radeon: workaround for CP HW bug on CIK")
Signed-off-by: Icenowy Zheng 
---
 drivers/gpu/drm/radeon/cik.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 8275eeba0b349..9d203054f922a 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -3543,9 +3543,7 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
struct radeon_ring *ring = &rdev->ring[fence->ring];
u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
 
-   /* Workaround for cache flush problems. First send a dummy EOP
-* event down the pipe with seq one below.
-*/
+   /* Workaround for cache flush problems by sending the EOP event twice */
radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
 EOP_TC_ACTION_EN |
@@ -3554,10 +3552,9 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
radeon_ring_write(ring, addr & 0xfffc);
radeon_ring_write(ring, (upper_32_bits(addr) & 0x) |
DATA_SEL(1) | INT_SEL(0));
-   radeon_ring_write(ring, fence->seq - 1);
+   radeon_ring_write(ring, fence->seq);
radeon_ring_write(ring, 0);
 
-   /* Then send the real EOP event down the pipe. */
radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
 EOP_TC_ACTION_EN |
-- 
2.45.1



Re: [PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Christian König

Am 17.06.24 um 12:58 schrieb Icenowy Zheng:

The duplication of EOP packets for GFX7/8, with the former one have
seq-1 written and the latter one have seq written, seems to confuse some
hardware platform (e.g. Loongson 7A series PCIe controllers).

Make the content of the duplicated EOP packet the same with the real
one, only masking any possible interrupts.


Well completely NAK to that, exactly that disables the workaround.

The CPU needs to see two different values written here.

Regards,
Christian.



Fixes: bf26da927a1c ("drm/amdgpu: add cache flush workaround to gfx8 
emit_fence")
Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts")
Signed-off-by: Icenowy Zheng 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 12 +---
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 
  2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 541dbd70d8c75..778f27f1a34fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2117,9 +2117,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct 
amdgpu_ring *ring, u64 addr,
  {
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
-   /* Workaround for cache flush problems. First send a dummy EOP
-* event down the pipe with seq one below.
-*/
+
+   /* Workaround for cache flush problems, send EOP twice. */
amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
 EOP_TC_ACTION_EN |
@@ -2127,11 +2126,10 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct 
amdgpu_ring *ring, u64 addr,
 EVENT_INDEX(5)));
amdgpu_ring_write(ring, addr & 0xfffc);
amdgpu_ring_write(ring, (upper_32_bits(addr) & 0x) |
-   DATA_SEL(1) | INT_SEL(0));
-   amdgpu_ring_write(ring, lower_32_bits(seq - 1));
-   amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+   DATA_SEL(write64bit ? 2 : 1) | INT_SEL(0));
+   amdgpu_ring_write(ring, lower_32_bits(seq));
+   amdgpu_ring_write(ring, upper_32_bits(seq));
  
-	/* Then send the real EOP event down the pipe. */

amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
 EOP_TC_ACTION_EN |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 2f0e72caee1af..39a7d60f1fd69 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6153,9 +6153,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct 
amdgpu_ring *ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
  
-	/* Workaround for cache flush problems. First send a dummy EOP

-* event down the pipe with seq one below.
-*/
+   /* Workaround for cache flush problems, send EOP twice. */
amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
 EOP_TC_ACTION_EN |
@@ -6164,12 +6162,10 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct 
amdgpu_ring *ring, u64 addr,
 EVENT_INDEX(5)));
amdgpu_ring_write(ring, addr & 0xfffc);
amdgpu_ring_write(ring, (upper_32_bits(addr) & 0x) |
-   DATA_SEL(1) | INT_SEL(0));
-   amdgpu_ring_write(ring, lower_32_bits(seq - 1));
-   amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+ DATA_SEL(write64bit ? 2 : 1) | INT_SEL(0));
+   amdgpu_ring_write(ring, lower_32_bits(seq));
+   amdgpu_ring_write(ring, upper_32_bits(seq));
  
-	/* Then send the real EOP event down the pipe:

-* EVENT_WRITE_EOP - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
 EOP_TC_ACTION_EN |




Re: [PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Icenowy Zheng
在 2024-06-17星期一的 14:35 +0200,Christian König写道:
> Am 17.06.24 um 12:58 schrieb Icenowy Zheng:
> > The duplication of EOP packets for GFX7/8, with the former one have
> > seq-1 written and the latter one have seq written, seems to confuse
> > some
> > hardware platform (e.g. Loongson 7A series PCIe controllers).
> > 
> > Make the content of the duplicated EOP packet the same with the
> > real
> > one, only masking any possible interrupts.
> 
> Well completely NAK to that, exactly that disables the workaround.
> 
> The CPU needs to see two different values written here.

Why do the CPU need to see two different values here? Only the second
packet will raise an interrupt before and after applying this patch,
and the first packet's result should just be overriden on ordinary
platforms. The CPU won't see the first one, until it's polling for the
address for a very short interval, so short that the GPU CP couldn't
execute 2 commands.

Or do you mean the GPU needs to see two different values being written,
or they will be merged into only one write request?

Please give out more information about this workaround, otherwise the
GPU hang problem on Loongson platforms will persist.

> 
> Regards,
> Christian.
> 
> > 
> > Fixes: bf26da927a1c ("drm/amdgpu: add cache flush workaround to
> > gfx8 emit_fence")
> > Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts")
> > Signed-off-by: Icenowy Zheng 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 12 +---
> >   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 
> >   2 files changed, 9 insertions(+), 15 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > index 541dbd70d8c75..778f27f1a34fe 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > @@ -2117,9 +2117,8 @@ static void
> > gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
> >   {
> > bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
> > bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
> > -   /* Workaround for cache flush problems. First send a dummy
> > EOP
> > -    * event down the pipe with seq one below.
> > -    */
> > +
> > +   /* Workaround for cache flush problems, send EOP twice. */
> > amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP,
> > 4));
> > amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
> >  EOP_TC_ACTION_EN |
> > @@ -2127,11 +2126,10 @@ static void
> > gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
> >  EVENT_INDEX(5)));
> > amdgpu_ring_write(ring, addr & 0xfffc);
> > amdgpu_ring_write(ring, (upper_32_bits(addr) & 0x) |
> > -   DATA_SEL(1) | INT_SEL(0));
> > -   amdgpu_ring_write(ring, lower_32_bits(seq - 1));
> > -   amdgpu_ring_write(ring, upper_32_bits(seq - 1));
> > +   DATA_SEL(write64bit ? 2 : 1) |
> > INT_SEL(0));
> > +   amdgpu_ring_write(ring, lower_32_bits(seq));
> > +   amdgpu_ring_write(ring, upper_32_bits(seq));
> >   
> > -   /* Then send the real EOP event down the pipe. */
> > amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP,
> > 4));
> > amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
> >  EOP_TC_ACTION_EN |
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > index 2f0e72caee1af..39a7d60f1fd69 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > @@ -6153,9 +6153,7 @@ static void
> > gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
> > bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
> > bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
> >   
> > -   /* Workaround for cache flush problems. First send a dummy
> > EOP
> > -    * event down the pipe with seq one below.
> > -    */
> > +   /* Workaround for cache flush problems, send EOP twice. */
> > amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP,
> > 4));
> > amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
> >  EOP_TC_ACTION_EN |
> > @@ -6164,12 +6162,10 @@ static void
> > gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
> >  EVENT_INDEX(5)));
> > amdgpu_ring_write(ring, addr & 0xfffc);
> > amdgpu_ring_write(ring, (upper_32_bits(addr) & 0x) |
> > -   DATA_SEL(1) | INT_SEL(0));
> > -   amdgpu_ring_write(ring, lower_32_bits(seq - 1));
> > -   amdgpu_ring_write(ring, upper_32_bits(seq - 1));
> > + DATA_SEL(write64bit ? 2 : 1) |
> > INT_SEL(0));
> > +   amdgpu_ring_write(ring, lower_32_bits(seq));
> > +   amdgpu_ring_write(ring, upper_32_bits(seq));
> >  

Re: [bug report] drm/amdgpu: amdgpu crash on playing videos, linux 6.10-rc

2024-06-17 Thread Wang Yunchen
On Mon, 2024-06-17 at 06:55 +0800, Winston Ma wrote:
> I only build the kernel once. I could try but I think you couldn't
> expect much from my side.
> 
> BTW I installed 6.10-rc4 this morning from Ubuntu mainline
> (https://kernel.ubuntu.com/mainline/v6.10-rc4/amd64/) and I couldn't
> replicate the video crash problem. Yunchen could you try 6.10-rc4 and
> see if you still have the video crash problem?
> 
> But I still get the green blocky object when I keep toggling full
> screen during youtube watch (Screenshot: https://ibb.co/8Dpdxc3). I
> didn't see the green block in 6.9 so it could be another issue.
> 
> Thanks and Regards,
> Winston
> 
> 
> On Sun, Jun 16, 2024 at 12:10 AM Wang Yunchen  wrote:
> > 
> > On Sat, 2024-06-15 at 17:50 +0200, Thorsten Leemhuis wrote:
> > > [reply made easier by moving something in the quote]
> > > 
> > > On 12.06.24 18:55, Wang Yunchen wrote:
> > > > On Wed, 2024-06-12 at 15:14 +0200, Linux regression tracking (Thorsten
> > > > Leemhuis) wrote:
> > > > > On 06.06.24 05:06, Winston Ma wrote:
> > > > > > Hi I got the same problem on Linux Kernel 6.10-rc2. I got the
> > > > > > problem
> > > > > > by
> > > > > > following the procedure below:
> > > > > > 
> > > > > >  1. Boot Linux Kernel 6.10-rc2
> > > > > >  2. Open Firefox (Any browser should work)
> > > > > >  3. Open a Youtube Video
> > > > > >  4. On the playing video, toggle fullscreen quickly Then after 10-
> > > > > > 20
> > > > > >     times of fullscreen toggling, the screen would enter freeze
> > > > > > mode.
> > > > > >     This is the log that I captured using the above method.
> > > > > 
> > > > > Hmm, seems nothing happened here for a while. Could you maybe try to
> > > > > bisect this
> > > > > (
> > > > > https://docs.kernel.org/admin-guide/verify-bugs-and-bisect-regressions.ht
> > > > > ml
> > > > > )?
> > > > 
> > > > It seems that the issue persists on 6.10 rc3.
> > > 
> > > That's good to know, but...
> > > 
> > > > > @amd-gfx devs: Or is this unneeded, as the cause found or maybe even
> > > > > fixed meanwhile?
> > > 
> > > ...as there was no reply to that inquiry it seems we really need either
> > > you or Winston Ma (or somebody else that is affected we don't yet know
> > > about) to perform a git bisection (see the link quoted above) to find
> > > the exact change that broke things. Without this it might not be getting
> > > fixed.
> > > 
> > > Ciao, Thorsten
> > > 
> > > > > > This is the kernel log
> > > > > > 
> > > > > > 2024-06-06T10:26:40.747576+08:00 kernel:
> > > > > > gmc_v10_0_process_interrupt:
> > > > > > 6 callbacks suppressed
> > > > > > 2024-06-06T10:26:40.747618+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:
> > > > > > [mmhub] page fault (src_id:0 ring:8 vmid:2
> > > > > > pasid:32789)
> > > > > > 2024-06-06T10:26:40.747623+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:
> > > > > > in process RDD Process pid 39524 thread
> > > > > > firefox-bi:cs0 pid 40342
> > > > > > 2024-06-06T10:26:40.747625+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:   in page starting at address
> > > > > > 0x800106ffe000 from client 0x12 (VMC)
> > > > > > 2024-06-06T10:26:40.747628+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:
> > > > > > MMVM_L2_PROTECTION_FAULT_STATUS:0x00203811
> > > > > > 2024-06-06T10:26:40.747629+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:  Faulty UTCL2 client ID: VCN (0x1c)
> > > > > > 2024-06-06T10:26:40.747631+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:  MORE_FAULTS: 0x1
> > > > > > 2024-06-06T10:26:40.747651+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:  WALKER_ERROR: 0x0
> > > > > > 2024-06-06T10:26:40.747653+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:  PERMISSION_FAULTS: 0x1
> > > > > > 2024-06-06T10:26:40.747655+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:  MAPPING_ERROR: 0x0
> > > > > > 2024-06-06T10:26:40.747656+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:  RW: 0x0
> > > > > > 2024-06-06T10:26:40.747658+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:
> > > > > > [mmhub] page fault (src_id:0 ring:8 vmid:2
> > > > > > pasid:32789)
> > > > > > 2024-06-06T10:26:40.747660+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:
> > > > > > in process RDD Process pid 39524 thread
> > > > > > firefox-bi:cs0 pid 40342
> > > > > > 2024-06-06T10:26:40.747662+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:   in page starting at address
> > > > > > 0x800106e0 from client 0x12 (VMC)
> > > > > > 2024-06-06T10:26:40.747663+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:
> > > > > > MMVM_L2_PROTECTION_FAULT_STATUS:0x
> > > > > > 2024-06-06T10:26:40.747664+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:  Faulty UTCL2 client ID: MP0 (0x0)
> > > > > > 2024-06-06T10:26:40.747666+08:00 kernel: amdgpu :03:00.0:
> > > > > > amdgpu:  MORE_FAULTS: 0x0
> > > > > > 2024-06-06T10:26:40.747667+08:

Re: [PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Christian König

Am 17.06.24 um 15:03 schrieb Icenowy Zheng:

在 2024-06-17星期一的 14:35 +0200,Christian König写道:

Am 17.06.24 um 12:58 schrieb Icenowy Zheng:

The duplication of EOP packets for GFX7/8, with the former one have
seq-1 written and the latter one have seq written, seems to confuse
some
hardware platform (e.g. Loongson 7A series PCIe controllers).

Make the content of the duplicated EOP packet the same with the
real
one, only masking any possible interrupts.

Well completely NAK to that, exactly that disables the workaround.

The CPU needs to see two different values written here.

Why do the CPU need to see two different values here? Only the second
packet will raise an interrupt before and after applying this patch,
and the first packet's result should just be overriden on ordinary
platforms. The CPU won't see the first one, until it's polling for the
address for a very short interval, so short that the GPU CP couldn't
execute 2 commands.


Yes exactly that. We need to make two writes, one with the old value 
(seq - 1) and a second with the real value (seq).


Otherwise it is possible that a polling CPU would see the sequence 
before the second EOP is issued with results in incoherent view of memory.



Or do you mean the GPU needs to see two different values being written,
or they will be merged into only one write request?

Please give out more information about this workaround, otherwise the
GPU hang problem on Loongson platforms will persist.


Well if Loongson can't handle two consecutive write operations to the 
same address with different values then you have a massive platform bug.


That is something which can happen all the time throughout the operation.

Regards,
Christian.




Regards,
Christian.


Fixes: bf26da927a1c ("drm/amdgpu: add cache flush workaround to
gfx8 emit_fence")
Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts")
Signed-off-by: Icenowy Zheng 
---
   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 12 +---
   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 
   2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 541dbd70d8c75..778f27f1a34fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2117,9 +2117,8 @@ static void
gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
   {
 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
-   /* Workaround for cache flush problems. First send a dummy
EOP
-    * event down the pipe with seq one below.
-    */
+
+   /* Workaround for cache flush problems, send EOP twice. */
 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP,
4));
 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
  EOP_TC_ACTION_EN |
@@ -2127,11 +2126,10 @@ static void
gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
  EVENT_INDEX(5)));
 amdgpu_ring_write(ring, addr & 0xfffc);
 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0x) |
-   DATA_SEL(1) | INT_SEL(0));
-   amdgpu_ring_write(ring, lower_32_bits(seq - 1));
-   amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+   DATA_SEL(write64bit ? 2 : 1) |
INT_SEL(0));
+   amdgpu_ring_write(ring, lower_32_bits(seq));
+   amdgpu_ring_write(ring, upper_32_bits(seq));
   
-   /* Then send the real EOP event down the pipe. */

 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP,
4));
 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
  EOP_TC_ACTION_EN |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 2f0e72caee1af..39a7d60f1fd69 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6153,9 +6153,7 @@ static void
gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
   
-   /* Workaround for cache flush problems. First send a dummy

EOP
-    * event down the pipe with seq one below.
-    */
+   /* Workaround for cache flush problems, send EOP twice. */
 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP,
4));
 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
  EOP_TC_ACTION_EN |
@@ -6164,12 +6162,10 @@ static void
gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
  EVENT_INDEX(5)));
 amdgpu_ring_write(ring, addr & 0xfffc);
 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0x) |
-   DATA_SEL(1) | INT_SEL(0));
-   amdgpu_ring_write(ring, lower_32_bits(seq - 1));
-   amdgpu_

[PATCH AUTOSEL 6.9 38/44] drm/amdgpu/pptable: Fix UBSAN array-index-out-of-bounds

2024-06-17 Thread Sasha Levin
From: Tasos Sahanidis 

[ Upstream commit c6c4dd54012551cce5cde408b35468f2c62b0cce ]

Flexible arrays used [1] instead of []. Replace the former with the latter
to resolve multiple UBSAN warnings observed on boot with a BONAIRE card.

In addition, use the __counted_by attribute where possible to hint the
length of the arrays to the compiler and any sanitizers.

Signed-off-by: Tasos Sahanidis 
Signed-off-by: Alex Deucher 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/include/pptable.h | 91 ++-
 1 file changed, 49 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/pptable.h 
b/drivers/gpu/drm/amd/include/pptable.h
index 2e8e6c9875f6c..f83ace2d7ec30 100644
--- a/drivers/gpu/drm/amd/include/pptable.h
+++ b/drivers/gpu/drm/amd/include/pptable.h
@@ -477,31 +477,30 @@ typedef struct _ATOM_PPLIB_STATE_V2
 } ATOM_PPLIB_STATE_V2;
 
 typedef struct _StateArray{
-//how many states we have 
-UCHAR ucNumEntries;
-
-ATOM_PPLIB_STATE_V2 states[1];
+   //how many states we have
+   UCHAR ucNumEntries;
+
+   ATOM_PPLIB_STATE_V2 states[] /* __counted_by(ucNumEntries) */;
 }StateArray;
 
 
 typedef struct _ClockInfoArray{
-//how many clock levels we have
-UCHAR ucNumEntries;
-
-//sizeof(ATOM_PPLIB_CLOCK_INFO)
-UCHAR ucEntrySize;
-
-UCHAR clockInfo[];
+   //how many clock levels we have
+   UCHAR ucNumEntries;
+
+   //sizeof(ATOM_PPLIB_CLOCK_INFO)
+   UCHAR ucEntrySize;
+
+   UCHAR clockInfo[];
 }ClockInfoArray;
 
 typedef struct _NonClockInfoArray{
+   //how many non-clock levels we have. normally should be same as number 
of states
+   UCHAR ucNumEntries;
+   //sizeof(ATOM_PPLIB_NONCLOCK_INFO)
+   UCHAR ucEntrySize;
 
-//how many non-clock levels we have. normally should be same as number of 
states
-UCHAR ucNumEntries;
-//sizeof(ATOM_PPLIB_NONCLOCK_INFO)
-UCHAR ucEntrySize;
-
-ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[];
+   ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[] __counted_by(ucNumEntries);
 }NonClockInfoArray;
 
 typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record
@@ -513,8 +512,10 @@ typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record
 
 typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Table
 {
-UCHAR ucNumEntries;// 
Number of entries.
-ATOM_PPLIB_Clock_Voltage_Dependency_Record entries[1]; // 
Dynamically allocate entries.
+   // Number of entries.
+   UCHAR ucNumEntries;
+   // Dynamically allocate entries.
+   ATOM_PPLIB_Clock_Voltage_Dependency_Record entries[] 
__counted_by(ucNumEntries);
 }ATOM_PPLIB_Clock_Voltage_Dependency_Table;
 
 typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Record
@@ -529,8 +530,10 @@ typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Record
 
 typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Table
 {
-UCHAR ucNumEntries;// 
Number of entries.
-ATOM_PPLIB_Clock_Voltage_Limit_Record entries[1];  // 
Dynamically allocate entries.
+   // Number of entries.
+   UCHAR ucNumEntries;
+   // Dynamically allocate entries.
+   ATOM_PPLIB_Clock_Voltage_Limit_Record entries[] 
__counted_by(ucNumEntries);
 }ATOM_PPLIB_Clock_Voltage_Limit_Table;
 
 union _ATOM_PPLIB_CAC_Leakage_Record
@@ -553,8 +556,10 @@ typedef union _ATOM_PPLIB_CAC_Leakage_Record 
ATOM_PPLIB_CAC_Leakage_Record;
 
 typedef struct _ATOM_PPLIB_CAC_Leakage_Table
 {
-UCHAR ucNumEntries; // 
Number of entries.
-ATOM_PPLIB_CAC_Leakage_Record entries[1];   // 
Dynamically allocate entries.
+   // Number of entries.
+   UCHAR ucNumEntries;
+   // Dynamically allocate entries.
+   ATOM_PPLIB_CAC_Leakage_Record entries[] __counted_by(ucNumEntries);
 }ATOM_PPLIB_CAC_Leakage_Table;
 
 typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Record
@@ -568,8 +573,10 @@ typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Record
 
 typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Table
 {
-UCHAR ucNumEntries; // 
Number of entries.
-ATOM_PPLIB_PhaseSheddingLimits_Record entries[1];   // 
Dynamically allocate entries.
+   // Number of entries.
+   UCHAR ucNumEntries;
+   // Dynamically allocate entries.
+   ATOM_PPLIB_PhaseSheddingLimits_Record entries[] 
__counted_by(ucNumEntries);
 }ATOM_PPLIB_PhaseSheddingLimits_Table;
 
 typedef struct _VCEClockInfo{
@@ -580,8 +587,8 @@ typedef struct _VCEClockInfo{
 }VCEClockInfo;
 
 typedef struct _VCEClockInfoArray{
-UCHAR ucNumEntries;
-VCEClockInfo entries[1];
+   UCHAR ucNumEntries;
+   VCEClockInfo entries[] __counted_by(ucNumEntries);
 }VCEClockInfoArray;
 
 typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record
@@ -592,8 +599,8 @@ typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Li

Re: [PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Icenowy Zheng
在 2024-06-17星期一的 15:09 +0200,Christian König写道:
> Am 17.06.24 um 15:03 schrieb Icenowy Zheng:
> > 在 2024-06-17星期一的 14:35 +0200,Christian König写道:
> > > Am 17.06.24 um 12:58 schrieb Icenowy Zheng:
> > > > The duplication of EOP packets for GFX7/8, with the former one
> > > > have
> > > > seq-1 written and the latter one have seq written, seems to
> > > > confuse
> > > > some
> > > > hardware platform (e.g. Loongson 7A series PCIe controllers).
> > > > 
> > > > Make the content of the duplicated EOP packet the same with the
> > > > real
> > > > one, only masking any possible interrupts.
> > > Well completely NAK to that, exactly that disables the
> > > workaround.
> > > 
> > > The CPU needs to see two different values written here.
> > Why do the CPU need to see two different values here? Only the
> > second
> > packet will raise an interrupt before and after applying this
> > patch,
> > and the first packet's result should just be overriden on ordinary
> > platforms. The CPU won't see the first one, until it's polling for
> > the
> > address for a very short interval, so short that the GPU CP
> > couldn't
> > execute 2 commands.
> 
> Yes exactly that. We need to make two writes, one with the old value 
> (seq - 1) and a second with the real value (seq).
> 
> Otherwise it is possible that a polling CPU would see the sequence 
> before the second EOP is issued with results in incoherent view of
> memory.

In this case shouldn't we write seq-1 before any work, and then write
seq after work, like what is done in Mesa?

As what I see, Mesa uses another command buffer to emit a
EVENT_WRITE_EOP writing 0, and commit this command buffer before the
real command buffer.

> 
> > Or do you mean the GPU needs to see two different values being
> > written,
> > or they will be merged into only one write request?
> > 
> > Please give out more information about this workaround, otherwise
> > the
> > GPU hang problem on Loongson platforms will persist.
> 
> Well if Loongson can't handle two consecutive write operations to the
> same address with different values then you have a massive platform
> bug.

I think the issue is triggered when two consecutive write operations
and one IRQ is present, which is exactly the case of this function.

> 
> That is something which can happen all the time throughout the
> operation.
> 
> Regards,
> Christian.
> 
> > 
> > > Regards,
> > > Christian.
> > > 
> > > > Fixes: bf26da927a1c ("drm/amdgpu: add cache flush workaround to
> > > > gfx8 emit_fence")
> > > > Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts")
> > > > Signed-off-by: Icenowy Zheng 
> > > > ---
> > > >    drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 12 +---
> > > >    drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 
> > > >    2 files changed, 9 insertions(+), 15 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > > > b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > > > index 541dbd70d8c75..778f27f1a34fe 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > > > @@ -2117,9 +2117,8 @@ static void
> > > > gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64
> > > > addr,
> > > >    {
> > > >  bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
> > > >  bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
> > > > -   /* Workaround for cache flush problems. First send a
> > > > dummy
> > > > EOP
> > > > -    * event down the pipe with seq one below.
> > > > -    */
> > > > +
> > > > +   /* Workaround for cache flush problems, send EOP twice.
> > > > */
> > > >  amdgpu_ring_write(ring,
> > > > PACKET3(PACKET3_EVENT_WRITE_EOP,
> > > > 4));
> > > >  amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
> > > >   EOP_TC_ACTION_EN |
> > > > @@ -2127,11 +2126,10 @@ static void
> > > > gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64
> > > > addr,
> > > >   EVENT_INDEX(5)));
> > > >  amdgpu_ring_write(ring, addr & 0xfffc);
> > > >  amdgpu_ring_write(ring, (upper_32_bits(addr) & 0x)
> > > > |
> > > > -   DATA_SEL(1) | INT_SEL(0));
> > > > -   amdgpu_ring_write(ring, lower_32_bits(seq - 1));
> > > > -   amdgpu_ring_write(ring, upper_32_bits(seq - 1));
> > > > +   DATA_SEL(write64bit ? 2 : 1) |
> > > > INT_SEL(0));
> > > > +   amdgpu_ring_write(ring, lower_32_bits(seq));
> > > > +   amdgpu_ring_write(ring, upper_32_bits(seq));
> > > >    
> > > > -   /* Then send the real EOP event down the pipe. */
> > > >  amdgpu_ring_write(ring,
> > > > PACKET3(PACKET3_EVENT_WRITE_EOP,
> > > > 4));
> > > >  amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
> > > >   EOP_TC_ACTION_EN |
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > > > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > > 

Re: [PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Christian König

Am 17.06.24 um 15:43 schrieb Icenowy Zheng:

在 2024-06-17星期一的 15:09 +0200,Christian König写道:

Am 17.06.24 um 15:03 schrieb Icenowy Zheng:

在 2024-06-17星期一的 14:35 +0200,Christian König写道:

Am 17.06.24 um 12:58 schrieb Icenowy Zheng:

The duplication of EOP packets for GFX7/8, with the former one
have
seq-1 written and the latter one have seq written, seems to
confuse
some
hardware platform (e.g. Loongson 7A series PCIe controllers).

Make the content of the duplicated EOP packet the same with the
real
one, only masking any possible interrupts.

Well completely NAK to that, exactly that disables the
workaround.

The CPU needs to see two different values written here.

Why do the CPU need to see two different values here? Only the
second
packet will raise an interrupt before and after applying this
patch,
and the first packet's result should just be overriden on ordinary
platforms. The CPU won't see the first one, until it's polling for
the
address for a very short interval, so short that the GPU CP
couldn't
execute 2 commands.

Yes exactly that. We need to make two writes, one with the old value
(seq - 1) and a second with the real value (seq).

Otherwise it is possible that a polling CPU would see the sequence
before the second EOP is issued with results in incoherent view of
memory.

In this case shouldn't we write seq-1 before any work, and then write
seq after work, like what is done in Mesa?


No. This hw workaround requires that two consecutive write operations 
happen directly behind each other on the PCIe bus with two different values.


To make the software logic around that work without any changes we use 
the values seq - 1 and seq because those are guaranteed to be different 
and not trigger any unwanted software behavior.


Only then we can guarantee that we have a coherent view of system memory.


As what I see, Mesa uses another command buffer to emit a
EVENT_WRITE_EOP writing 0, and commit this command buffer before the
real command buffer.


Or do you mean the GPU needs to see two different values being
written,
or they will be merged into only one write request?

Please give out more information about this workaround, otherwise
the
GPU hang problem on Loongson platforms will persist.

Well if Loongson can't handle two consecutive write operations to the
same address with different values then you have a massive platform
bug.

I think the issue is triggered when two consecutive write operations
and one IRQ is present, which is exactly the case of this function.


Well then you have a massive platform bug.

Two consecutive writes to the same bus address are perfectly legal from 
the PCIe specification and can happen all the time, even without this 
specific hw workaround.


Regards,
Christian.




That is something which can happen all the time throughout the
operation.

Regards,
Christian.


Regards,
Christian.


Fixes: bf26da927a1c ("drm/amdgpu: add cache flush workaround to
gfx8 emit_fence")
Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts")
Signed-off-by: Icenowy Zheng 
---
    drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 12 +---
    drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 
    2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 541dbd70d8c75..778f27f1a34fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2117,9 +2117,8 @@ static void
gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64
addr,
    {
  bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
  bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
-   /* Workaround for cache flush problems. First send a
dummy
EOP
-    * event down the pipe with seq one below.
-    */
+
+   /* Workaround for cache flush problems, send EOP twice.
*/
  amdgpu_ring_write(ring,
PACKET3(PACKET3_EVENT_WRITE_EOP,
4));
  amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
   EOP_TC_ACTION_EN |
@@ -2127,11 +2126,10 @@ static void
gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64
addr,
   EVENT_INDEX(5)));
  amdgpu_ring_write(ring, addr & 0xfffc);
  amdgpu_ring_write(ring, (upper_32_bits(addr) & 0x)
|
-   DATA_SEL(1) | INT_SEL(0));
-   amdgpu_ring_write(ring, lower_32_bits(seq - 1));
-   amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+   DATA_SEL(write64bit ? 2 : 1) |
INT_SEL(0));
+   amdgpu_ring_write(ring, lower_32_bits(seq));
+   amdgpu_ring_write(ring, upper_32_bits(seq));

-   /* Then send the real EOP event down the pipe. */

  amdgpu_ring_write(ring,
PACKET3(PACKET3_EVENT_WRITE_EOP,
4));
  amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
   EOP_TC_ACTION_EN |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
b/drivers/

RE: [PATCH 00/36] DC Patches June 11, 2024

2024-06-17 Thread Wheeler, Daniel
[Public]

Hi all,

This week this patchset was tested on the following systems:
* Lenovo ThinkBook T13s Gen4 with AMD Ryzen 5 6600U
* MSI Gaming X Trio RX 6800
* Gigabyte Gaming OC RX 7900 XTX

These systems were tested on the following display/connection types:
* eDP, (1080p 60hz [5650U]) (1920x1200 60hz [6600U]) (2560x1600 
120hz[6600U])
* VGA and DVI (1680x1050 60hz [DP to VGA/DVI, USB-C to VGA/DVI])
* DP/HDMI/USB-C (1440p 170hz, 4k 60hz, 4k 144hz, 4k 240hz [Includes 
USB-C to DP/HDMI adapters])
* Thunderbolt (LG Ultrafine 5k)
* MST (Startech MST14DP123DP [DP to 3x DP] and 2x 4k 60Hz displays)
* DSC (with Cable Matters 101075 [DP to 3x DP] with 3x 4k60 displays, 
and HP Hook G2 with 1 4k60 display)
* USB 4 (Kensington SD5700T and 1x 4k 60Hz display)
* PCON (Club3D CAC-1085 and 1x 4k 144Hz display [at 4k 120HZ, as that 
is the max the adapter supports])

The testing is a mix of automated and manual tests. Manual testing includes 
(but is not limited to):
* Changing display configurations and settings
* Benchmark testing
* Feature testing (Freesync, etc.)

Automated testing includes (but is not limited to):
* Script testing (scripts to automate some of the manual checks)
* IGT testing

The patchset consists of the amd-staging-drm-next branch (Head commit - 
0a64705ad8b13bb27909ec47bd6b3a4209eb37bd -> drm/amd/display: Disable PHYSYMCLK 
RCO) with new patches added on top of it.

Tested on Ubuntu 22.04.3, on Wayland and X11, using KDE Plasma and Gnome.


Tested-by: Daniel Wheeler 


Thank you,

Dan Wheeler
Sr. Technologist | AMD
SW Display
--
1 Commerce Valley Dr E, Thornhill, ON L3T 7X6
amd.com

-Original Message-
From: Mahfooz, Hamza 
Sent: Tuesday, June 11, 2024 12:51 PM
To: amd-gfx@lists.freedesktop.org
Cc: Wheeler, Daniel ; Wentland, Harry 
; Li, Sun peng (Leo) ; Lakha, 
Bhawanpreet ; Siqueira, Rodrigo 
; Pillai, Aurabindo ; Zhuo, 
Lillian ; Li, Roman ; Lin, Wayne 
; Chiu, Solomon ; Gutierrez, Agustin 
; Zuo, Jerry ; Mahfooz, Hamza 

Subject: [PATCH 00/36] DC Patches June 11, 2024

Cc: Daniel Wheeler 

Alex Hung (15):
  drm/amd/display: Explicitly extend unsigned 16 bit to 64 bit
  drm/amd/display: Add null checker before passing variables
  drm/amd/display: Check BIOS images before it is used
  drm/amd/display: Skip wbscl_set_scaler_filter if filter is null
  drm/amd/display: Add null checker before access structs
  drm/amd/display: Check dc_stream_state before it is used
  drm/amd/display: Check pipe_ctx before it is used
  drm/amd/display: Covert integers to double before divisions
  drm/amd/display: Remove redundant checks for res_pool->dccg
  drm/amd/display: Remove redundant checks for ctx->dc_bios
  drm/amd/display: Remove redundant null checks
  drm/amd/display: Remove redundant checks for opp
  drm/amd/display: Remove redundant checks for context
  drm/amd/display: Check UnboundedRequestEnabled's value
  drm/amd/display: Remove redundant null checks

Alvin Lee (1):
  drm/amd/display: Make sure to reprogram ODM when resync fifo

Anthony Koo (1):
  drm/amd/display: [FW Promotion] Release 0.0.222.0

Aric Cyr (1):
  drm/amd/display: 3.2.289

Chris Park (1):
  drm/amd/display: On clock init, maintain DISPCLK freq

Dillon Varone (2):
  drm/amd/display: Enable DCN401 idle optimizations by default
  drm/amd/display: Add null check to dml21_find_dc_pipes_for_plane

Ivan Lipski (3):
  drm/amd/display: Remove redundant condition with DEADCODE
  drm/amd/display: Remove redundant condition in VBA 314 func
  drm/amd/display: Remove unused value set from 'min_hratio_fact' in dml

Joshua Aberback (3):
  drm/amd/display: DCN401 full power down in HW init if any link enabled
  Revert "drm/amd/display: workaround for oled eDP not lighting up on
DCN401"
  drm/amd/display: Remove duplicate HWSS interfaces

Michael Strauss (1):
  drm/amd/display: Attempt to avoid empty TUs when endpoint is DPIA

Mounika Adhuri (1):
  drm/amd/display: Refactor DCN3X into component folder

Relja Vojvodic (1):
  drm/amd/display: Add dcn401 DIG fifo enable/disable

Rodrigo Siqueira (3):
  drm/amd/display: Fix NULL pointer dereference for DTN log in DCN401
  drm/amd/display: Fix warning caused by an attempt to configure a
non-otg master
  drm/amd/display: Improve warning log for get OPP for OTG master

Sridevi Arvindekar (1):
  drm/amd/display: mirror case cleanup for cursors

Sung Joon Kim (1):
  drm/amd/display: Send message to notify the DPIA host router bandwidth

Wenjing Liu (1):
  drm/amd/display: fix minor coding errors where dml21 phase 5 uses
wrong variables

 drivers/gpu/drm/amd/display/Makefile  |   7 ++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  19 +--
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c |   2 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |

Re: [PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Icenowy Zheng
在 2024-06-17星期一的 15:59 +0200,Christian König写道:
> Am 17.06.24 um 15:43 schrieb Icenowy Zheng:
> > 在 2024-06-17星期一的 15:09 +0200,Christian König写道:
> > > Am 17.06.24 um 15:03 schrieb Icenowy Zheng:
> > > > 在 2024-06-17星期一的 14:35 +0200,Christian König写道:
> > > > > Am 17.06.24 um 12:58 schrieb Icenowy Zheng:
> > > > > > The duplication of EOP packets for GFX7/8, with the former
> > > > > > one
> > > > > > have
> > > > > > seq-1 written and the latter one have seq written, seems to
> > > > > > confuse
> > > > > > some
> > > > > > hardware platform (e.g. Loongson 7A series PCIe
> > > > > > controllers).
> > > > > > 
> > > > > > Make the content of the duplicated EOP packet the same with
> > > > > > the
> > > > > > real
> > > > > > one, only masking any possible interrupts.
> > > > > Well completely NAK to that, exactly that disables the
> > > > > workaround.
> > > > > 
> > > > > The CPU needs to see two different values written here.
> > > > Why do the CPU need to see two different values here? Only the
> > > > second
> > > > packet will raise an interrupt before and after applying this
> > > > patch,
> > > > and the first packet's result should just be overriden on
> > > > ordinary
> > > > platforms. The CPU won't see the first one, until it's polling
> > > > for
> > > > the
> > > > address for a very short interval, so short that the GPU CP
> > > > couldn't
> > > > execute 2 commands.
> > > Yes exactly that. We need to make two writes, one with the old
> > > value
> > > (seq - 1) and a second with the real value (seq).
> > > 
> > > Otherwise it is possible that a polling CPU would see the
> > > sequence
> > > before the second EOP is issued with results in incoherent view
> > > of
> > > memory.
> > In this case shouldn't we write seq-1 before any work, and then
> > write
> > seq after work, like what is done in Mesa?
> 
> No. This hw workaround requires that two consecutive write operations
> happen directly behind each other on the PCIe bus with two different
> values.

Well to be honest the workaround code in Mesa seems to not be working
in this way ...

> 
> To make the software logic around that work without any changes we
> use 
> the values seq - 1 and seq because those are guaranteed to be
> different 
> and not trigger any unwanted software behavior.
> 
> Only then we can guarantee that we have a coherent view of system
> memory.

Any more details about it?

BTW in this case, could I try to write it for 3 times instead of 2,
with seq-1, seq and seq?

> 
> > As what I see, Mesa uses another command buffer to emit a
> > EVENT_WRITE_EOP writing 0, and commit this command buffer before
> > the
> > real command buffer.
> > 
> > > > Or do you mean the GPU needs to see two different values being
> > > > written,
> > > > or they will be merged into only one write request?
> > > > 
> > > > Please give out more information about this workaround,
> > > > otherwise
> > > > the
> > > > GPU hang problem on Loongson platforms will persist.
> > > Well if Loongson can't handle two consecutive write operations to
> > > the
> > > same address with different values then you have a massive
> > > platform
> > > bug.
> > I think the issue is triggered when two consecutive write
> > operations
> > and one IRQ is present, which is exactly the case of this function.
> 
> Well then you have a massive platform bug.
> 
> Two consecutive writes to the same bus address are perfectly legal
> from 
> the PCIe specification and can happen all the time, even without this
> specific hw workaround.

Yes I know it, and I am not from Loongson, just some user trying to
mess around it.

> 
> Regards,
> Christian.
> 
> > 
> > > That is something which can happen all the time throughout the
> > > operation.
> > > 
> > > Regards,
> > > Christian.
> > > 
> > > > > Regards,
> > > > > Christian.
> > > > > 
> > > > > > Fixes: bf26da927a1c ("drm/amdgpu: add cache flush
> > > > > > workaround to
> > > > > > gfx8 emit_fence")
> > > > > > Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK
> > > > > > parts")
> > > > > > Signed-off-by: Icenowy Zheng 
> > > > > > ---
> > > > > >     drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 12 +---
> > > > > >     drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 
> > > > > >     2 files changed, 9 insertions(+), 15 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > > > > > b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > > > > > index 541dbd70d8c75..778f27f1a34fe 100644
> > > > > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > > > > > @@ -2117,9 +2117,8 @@ static void
> > > > > > gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64
> > > > > > addr,
> > > > > >     {
> > > > > >   bool write64bit = flags &
> > > > > > AMDGPU_FENCE_FLAG_64BIT;
> > > > > >   bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
> > > > > > -   /* Workaround for cache flush problems. First send
> > > > > > a
> > > >

Re: [PATCH v3] drm/fb-helper: Detect when lid is closed during initialization

2024-06-17 Thread Daniel Vetter
On Thu, Jun 13, 2024 at 12:17:00AM -0500, Mario Limonciello wrote:
> If the lid on a laptop is closed when eDP connectors are populated
> then it remains enabled when the initial framebuffer configuration
> is built.
> 
> When creating the initial framebuffer configuration detect the
> lid status and if it's closed disable any eDP connectors.
> 
> Also set up a workqueue to monitor for any future lid events.
> 
> Suggested-by: Dmitry Torokhov 
> Reported-by: Chris Bainbridge 
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3349
> Signed-off-by: Mario Limonciello 

More fundamental question: Do we even want to solve this issues?

This is fbdev we're talking about, if people use this actually in
production, they don't care much about the fallout.

Plus this has the potential to cause regressions, like we might not be
able to enable edp after having enabled and external screen. Which would
be bad.

Imo this needs a lot more justification than "one random user asked for
it". Especially since it's been the behaviour for like well over a decade
for any integrated panel that we just light them always up when using
fbdev/fbcon.

If people want full control over all this, they can use a display manager
(or a userspace console).

Cheers, Sima
> ---
> v2->v3:
>  * Use input device instead of ACPI device
>  * Detect lid open/close events
> ---
>  drivers/gpu/drm/drm_client_modeset.c |  29 ++
>  drivers/gpu/drm/drm_fb_helper.c  | 132 +++
>  include/drm/drm_device.h |   6 ++
>  include/drm/drm_fb_helper.h  |   2 +
>  4 files changed, 169 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_client_modeset.c 
> b/drivers/gpu/drm/drm_client_modeset.c
> index 31af5cf37a09..b8adfe87334b 100644
> --- a/drivers/gpu/drm/drm_client_modeset.c
> +++ b/drivers/gpu/drm/drm_client_modeset.c
> @@ -257,6 +257,34 @@ static void drm_client_connectors_enabled(struct 
> drm_connector **connectors,
>   enabled[i] = drm_connector_enabled(connectors[i], false);
>  }
>  
> +static void drm_client_match_edp_lid(struct drm_device *dev,
> +  struct drm_connector **connectors,
> +  unsigned int connector_count,
> +  bool *enabled)
> +{
> + int i;
> +
> + for (i = 0; i < connector_count; i++) {
> + struct drm_connector *connector = connectors[i];
> +
> + switch (connector->connector_type) {
> + case DRM_MODE_CONNECTOR_LVDS:
> + case DRM_MODE_CONNECTOR_eDP:
> + if (!enabled[i])
> + continue;
> + break;
> + default:
> + continue;
> + }
> +
> + if (dev->lid_closed) {
> + drm_dbg_kms(dev, "[CONNECTOR:%d:%s] lid is closed, 
> disabling\n",
> + connector->base.id, connector->name);
> + enabled[i] = false;
> + }
> + }
> +}
> +
>  static bool drm_client_target_cloned(struct drm_device *dev,
>struct drm_connector **connectors,
>unsigned int connector_count,
> @@ -844,6 +872,7 @@ int drm_client_modeset_probe(struct drm_client_dev 
> *client, unsigned int width,
>   memset(crtcs, 0, connector_count * sizeof(*crtcs));
>   memset(offsets, 0, connector_count * sizeof(*offsets));
>  
> + drm_client_match_edp_lid(dev, connectors, connector_count, 
> enabled);
>   if (!drm_client_target_cloned(dev, connectors, connector_count, 
> modes,
> offsets, enabled, width, height) 
> &&
>   !drm_client_target_preferred(dev, connectors, 
> connector_count, modes,
> diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
> index d612133e2cf7..41dd5887599a 100644
> --- a/drivers/gpu/drm/drm_fb_helper.c
> +++ b/drivers/gpu/drm/drm_fb_helper.c
> @@ -30,6 +30,8 @@
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>  
>  #include 
> +#include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -413,6 +415,128 @@ static void drm_fb_helper_damage_work(struct 
> work_struct *work)
>   drm_fb_helper_fb_dirty(helper);
>  }
>  
> +static void drm_fb_helper_lid_event(struct input_handle *handle, unsigned 
> int type,
> + unsigned int code, int value)
> +{
> + if (type == EV_SW && code == SW_LID) {
> + struct drm_fb_helper *fb_helper = handle->handler->private;
> +
> + if (value != fb_helper->dev->lid_closed) {
> + fb_helper->dev->lid_closed = value;
> + queue_work(fb_helper->input_wq, &fb_helper->lid_work);
> + }
> + }
> +}
> +
> +struct drm_fb_lid {
> + struct input_handle handle;
> +};
> +
> +static int drm_fb_helper_lid_connect(struc

Re: [PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Christian König

Am 17.06.24 um 16:30 schrieb Icenowy Zheng:

在 2024-06-17星期一的 15:59 +0200,Christian König写道:

Am 17.06.24 um 15:43 schrieb Icenowy Zheng:

在 2024-06-17星期一的 15:09 +0200,Christian König写道:

Am 17.06.24 um 15:03 schrieb Icenowy Zheng:

在 2024-06-17星期一的 14:35 +0200,Christian König写道:

Am 17.06.24 um 12:58 schrieb Icenowy Zheng:

The duplication of EOP packets for GFX7/8, with the former
one
have
seq-1 written and the latter one have seq written, seems to
confuse
some
hardware platform (e.g. Loongson 7A series PCIe
controllers).

Make the content of the duplicated EOP packet the same with
the
real
one, only masking any possible interrupts.

Well completely NAK to that, exactly that disables the
workaround.

The CPU needs to see two different values written here.

Why do the CPU need to see two different values here? Only the
second
packet will raise an interrupt before and after applying this
patch,
and the first packet's result should just be overriden on
ordinary
platforms. The CPU won't see the first one, until it's polling
for
the
address for a very short interval, so short that the GPU CP
couldn't
execute 2 commands.

Yes exactly that. We need to make two writes, one with the old
value
(seq - 1) and a second with the real value (seq).

Otherwise it is possible that a polling CPU would see the
sequence
before the second EOP is issued with results in incoherent view
of
memory.

In this case shouldn't we write seq-1 before any work, and then
write
seq after work, like what is done in Mesa?

No. This hw workaround requires that two consecutive write operations
happen directly behind each other on the PCIe bus with two different
values.

Well to be honest the workaround code in Mesa seems to not be working
in this way ...


Mesa doesn't have any workaround for that hw issue, the code there uses 
a quite different approach.



To make the software logic around that work without any changes we
use
the values seq - 1 and seq because those are guaranteed to be
different
and not trigger any unwanted software behavior.

Only then we can guarantee that we have a coherent view of system
memory.

Any more details about it?


No, sorry. All I know is that it's a bug in the cache flush logic which 
can be worked around by issuing two write behind each other to the same 
location.



BTW in this case, could I try to write it for 3 times instead of 2,
with seq-1, seq and seq?


That could potentially work as well, but at some point we would need to 
increase the EOP ring buffer size or could run into performance issues.



As what I see, Mesa uses another command buffer to emit a
EVENT_WRITE_EOP writing 0, and commit this command buffer before
the
real command buffer.


Or do you mean the GPU needs to see two different values being
written,
or they will be merged into only one write request?

Please give out more information about this workaround,
otherwise
the
GPU hang problem on Loongson platforms will persist.

Well if Loongson can't handle two consecutive write operations to
the
same address with different values then you have a massive
platform
bug.

I think the issue is triggered when two consecutive write
operations
and one IRQ is present, which is exactly the case of this function.

Well then you have a massive platform bug.

Two consecutive writes to the same bus address are perfectly legal
from
the PCIe specification and can happen all the time, even without this
specific hw workaround.

Yes I know it, and I am not from Loongson, just some user trying to
mess around it.


Well to be honest on a platform where even two consecutive writes to the 
same location doesn't work I would have strong doubts that it is stable 
in general.


Regards,
Christian.

[PATCH 1/2] drm/amdgpu: Use dev_ prints for virtualization as it supports multi adapter

2024-06-17 Thread Vignesh Chander
Signed-off-by: Vignesh Chander 
Change-Id: Ifead637951c00e5b4e97c766d172323dcac4da08
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 19 +++
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 23 +++
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 6b71ee85ee6556..65656afc6ed1c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -93,7 +93,7 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
timeout -= 5;
} while (timeout > 1);
 
-   pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", 
AI_MAILBOX_POLL_ACK_TIMEDOUT);
+   dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec\n", 
AI_MAILBOX_POLL_ACK_TIMEDOUT);
 
return -ETIME;
 }
@@ -111,7 +111,7 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, 
enum idh_event event)
timeout -= 10;
} while (timeout > 1);
 
-   pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+   dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r);
 
return -ETIME;
 }
@@ -132,7 +132,7 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device 
*adev,
xgpu_ai_mailbox_set_valid(adev, false);
trn = xgpu_ai_peek_ack(adev);
if (trn) {
-   pr_err("trn=%x ACK should not assert! wait again !\n", 
trn);
+   dev_err_ratelimited(adev->dev, "trn=%x ACK should not 
assert! wait again !\n", trn);
msleep(1);
}
} while(trn);
@@ -155,7 +155,7 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device 
*adev,
/* start to poll ack */
r = xgpu_ai_poll_ack(adev);
if (r)
-   pr_err("Doesn't get ack from pf, continue\n");
+   dev_err(adev->dev, "Doesn't get ack from pf, continue\n");
 
xgpu_ai_mailbox_set_valid(adev, false);
 }
@@ -173,7 +173,7 @@ static int xgpu_ai_send_access_requests(struct 
amdgpu_device *adev,
req == IDH_REQ_GPU_RESET_ACCESS) {
r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
if (r) {
-   pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give 
up\n");
+   dev_err(adev->dev, "Doesn't get READY_TO_ACCESS_GPU 
from pf, give up\n");
return r;
}
/* Retrieve checksum from mailbox2 */
@@ -231,7 +231,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device 
*adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
 {
-   DRM_DEBUG("get ack intr and do nothing.\n");
+   dev_dbg(adev->dev, "get ack intr and do nothing.\n");
return 0;
 }
 
@@ -258,12 +258,15 @@ static int xgpu_ai_wait_reset(struct amdgpu_device *adev)
 {
int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
do {
-   if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
+   if (xgpu_ai_mailbox_peek_msg(adev) == 
IDH_FLR_NOTIFICATION_CMPL) {
+   dev_dbg(adev->dev, "Got AI IDH_FLR_NOTIFICATION_CMPL 
after %d ms\n", AI_MAILBOX_POLL_FLR_TIMEDOUT - timeout);
return 0;
+   }
msleep(10);
timeout -= 10;
} while (timeout > 1);
-   dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n");
+
+   dev_dbg(adev->dev, "waiting AI IDH_FLR_NOTIFICATION_CMPL timeout\n");
return -ETIME;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 22af30a15a5fd7..17e1e8cc243752 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -91,7 +91,7 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev)
timeout -= 5;
} while (timeout > 1);
 
-   pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", 
NV_MAILBOX_POLL_ACK_TIMEDOUT);
+   dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec \n", 
NV_MAILBOX_POLL_ACK_TIMEDOUT);
 
return -ETIME;
 }
@@ -106,13 +106,16 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, 
enum idh_event event)
 
do {
r = xgpu_nv_mailbox_rcv_msg(adev, event);
-   if (!r)
+   if (!r) {
+   dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n", 
event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now);
return 0;
+   }
 
msleep(10);
now = (uint64_t)ktime_to_ms(ktime_get());
} while (timeout > now);
 
+   dev_dbg(adev->dev, "nv_poll_msg timed out\n");
 
return -ETIME;
 }
@@ -133,11 +136,12 @@ static void xgpu_nv_mailbox_trans_msg (struct 
amdgp

[PATCH 2/2] drm/amdgpu: process RAS fatal error MB notification

2024-06-17 Thread Vignesh Chander
For RAS error scenario, VF guest driver will check mailbox
and set fed flag to avoid unnecessary HW accesses.
additionally, poll for reset completion message first
to avoid accidentally spamming multiple reset requests to host.

Signed-off-by: Vignesh Chander 
Change-Id: I364c417ce292bec6bf98671bc03ea882529361ac
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 25 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  4 +++-
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c| 22 +++--
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h|  4 +++-
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c| 22 +++--
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h|  3 ++-
 6 files changed, 70 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 63f2286858c484..ccb3d041c2b249 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -229,6 +229,22 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
adev->virt.mm_table.gpu_addr = 0;
 }
 
+/**
+ * amdgpu_virt_rcvd_ras_interrupt() - receive ras interrupt
+ * @adev:  amdgpu device.
+ * Check whether host sent RAS error message
+ * Return: true if found, otherwise false
+ */
+bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev)
+{
+   struct amdgpu_virt *virt = &adev->virt;
+
+   if (!virt->ops || !virt->ops->rcvd_ras_intr)
+   return false;
+
+   return virt->ops->rcvd_ras_intr(adev);
+}
+
 
 unsigned int amd_sriov_msg_checksum(void *obj,
unsigned long obj_size,
@@ -612,11 +628,14 @@ static void amdgpu_virt_update_vf2pf_work_item(struct 
work_struct *work)
ret = amdgpu_virt_read_pf2vf_data(adev);
if (ret) {
adev->virt.vf2pf_update_retry_cnt++;
-   if ((adev->virt.vf2pf_update_retry_cnt >= 
AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) &&
-   amdgpu_sriov_runtime(adev)) {
+
+   if ((amdgpu_virt_rcvd_ras_interrupt(adev) ||
+   adev->virt.vf2pf_update_retry_cnt >= 
AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) &&
+   amdgpu_sriov_runtime(adev)) {
+
amdgpu_ras_set_fed(adev, true);
if (amdgpu_reset_domain_schedule(adev->reset_domain,
- 
&adev->kfd.reset_work))
+   &adev->kfd.reset_work))
return;
else
dev_err(adev->dev, "Failed to queue work! at 
%s", __func__);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index f04cd1586c7220..b42a8854dca0cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -52,7 +52,7 @@
 /* tonga/fiji use this offset */
 #define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
 
-#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 5
+#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
 
 enum amdgpu_sriov_vf_mode {
SRIOV_VF_MODE_BARE_METAL = 0,
@@ -94,6 +94,7 @@ struct amdgpu_virt_ops {
  u32 data1, u32 data2, u32 data3);
void (*ras_poison_handler)(struct amdgpu_device *adev,
enum amdgpu_ras_block block);
+   bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
 };
 
 /*
@@ -352,6 +353,7 @@ void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev);
 int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
+bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev);
 void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
 void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 65656afc6ed1c2..a2c0a43a02f351 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -196,11 +196,22 @@ static int xgpu_ai_request_reset(struct amdgpu_device 
*adev)
 {
int ret, i = 0;
 
-   while (i < AI_MAILBOX_POLL_MSG_REP_MAX) {
+   if (amdgpu_ras_get_fed_status(adev)) {
+   dev_dbg(adev->dev, "ras flag is set, poll for 
IDH_FLR_NOTIFICATION_CMPL\n");
+
+   for (i = 0; i < AI_MAILBOX_POLL_MSG_REP_MAX; i++) {
+   ret = xgpu_ai_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL);
+   if (!ret)
+   break;
+
+   dev_dbg(adev->dev, "retries left = %d\n", 
AI_MAILBOX_POLL_MSG_REP_MAX - i);
+   }
+   }
+
+   for (i = 0; i < AI_MAILBOX_POLL_MSG_REP_MAX; i++) {
ret = xgpu_a

Re: [PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Icenowy Zheng
在 2024-06-17星期一的 16:42 +0200,Christian König写道:
> Am 17.06.24 um 16:30 schrieb Icenowy Zheng:
> > 在 2024-06-17星期一的 15:59 +0200,Christian König写道:
> > > Am 17.06.24 um 15:43 schrieb Icenowy Zheng:
> > > > 在 2024-06-17星期一的 15:09 +0200,Christian König写道:
> > > > > Am 17.06.24 um 15:03 schrieb Icenowy Zheng:
> > > > > > 在 2024-06-17星期一的 14:35 +0200,Christian König写道:
> > > > > > > Am 17.06.24 um 12:58 schrieb Icenowy Zheng:
> > > > > > > > The duplication of EOP packets for GFX7/8, with the
> > > > > > > > former
> > > > > > > > one
> > > > > > > > have
> > > > > > > > seq-1 written and the latter one have seq written,
> > > > > > > > seems to
> > > > > > > > confuse
> > > > > > > > some
> > > > > > > > hardware platform (e.g. Loongson 7A series PCIe
> > > > > > > > controllers).
> > > > > > > > 
> > > > > > > > Make the content of the duplicated EOP packet the same
> > > > > > > > with
> > > > > > > > the
> > > > > > > > real
> > > > > > > > one, only masking any possible interrupts.
> > > > > > > Well completely NAK to that, exactly that disables the
> > > > > > > workaround.
> > > > > > > 
> > > > > > > The CPU needs to see two different values written here.
> > > > > > Why do the CPU need to see two different values here? Only
> > > > > > the
> > > > > > second
> > > > > > packet will raise an interrupt before and after applying
> > > > > > this
> > > > > > patch,
> > > > > > and the first packet's result should just be overriden on
> > > > > > ordinary
> > > > > > platforms. The CPU won't see the first one, until it's
> > > > > > polling
> > > > > > for
> > > > > > the
> > > > > > address for a very short interval, so short that the GPU CP
> > > > > > couldn't
> > > > > > execute 2 commands.
> > > > > Yes exactly that. We need to make two writes, one with the
> > > > > old
> > > > > value
> > > > > (seq - 1) and a second with the real value (seq).
> > > > > 
> > > > > Otherwise it is possible that a polling CPU would see the
> > > > > sequence
> > > > > before the second EOP is issued with results in incoherent
> > > > > view
> > > > > of
> > > > > memory.
> > > > In this case shouldn't we write seq-1 before any work, and then
> > > > write
> > > > seq after work, like what is done in Mesa?
> > > No. This hw workaround requires that two consecutive write
> > > operations
> > > happen directly behind each other on the PCIe bus with two
> > > different
> > > values.
> > Well to be honest the workaround code in Mesa seems to not be
> > working
> > in this way ...
> 
> Mesa doesn't have any workaround for that hw issue, the code there
> uses 
> a quite different approach.

Ah? Commit bf26da927a1c ("drm/amdgpu: add cache flush workaround to
gfx8 emit_fence") says "Both PAL and Mesa use it for gfx8 too, so port
this commit to gfx_v8_0_ring_emit_fence_gfx", so maybe the workaround
should just be not necessary here?


> 
> > > To make the software logic around that work without any changes
> > > we
> > > use
> > > the values seq - 1 and seq because those are guaranteed to be
> > > different
> > > and not trigger any unwanted software behavior.
> > > 
> > > Only then we can guarantee that we have a coherent view of system
> > > memory.
> > Any more details about it?
> 
> No, sorry. All I know is that it's a bug in the cache flush logic
> which 
> can be worked around by issuing two write behind each other to the
> same 
> location.

So the issue is that the first EOP write does not properly flush the
cache? Could EVENT_WRITE be used instead of EVENT_WRITE_EOP in this
workaround to properly flush it without hurting the fence value?


> 
> > BTW in this case, could I try to write it for 3 times instead of 2,
> > with seq-1, seq and seq?
> 
> That could potentially work as well, but at some point we would need
> to 
> increase the EOP ring buffer size or could run into performance
> issues.

Well I will try this. I think the buffer is enlarged in the original
workaround commit.

> 
> > > > As what I see, Mesa uses another command buffer to emit a
> > > > EVENT_WRITE_EOP writing 0, and commit this command buffer
> > > > before
> > > > the
> > > > real command buffer.
> > > > 
> > > > > > Or do you mean the GPU needs to see two different values
> > > > > > being
> > > > > > written,
> > > > > > or they will be merged into only one write request?
> > > > > > 
> > > > > > Please give out more information about this workaround,
> > > > > > otherwise
> > > > > > the
> > > > > > GPU hang problem on Loongson platforms will persist.
> > > > > Well if Loongson can't handle two consecutive write
> > > > > operations to
> > > > > the
> > > > > same address with different values then you have a massive
> > > > > platform
> > > > > bug.
> > > > I think the issue is triggered when two consecutive write
> > > > operations
> > > > and one IRQ is present, which is exactly the case of this
> > > > function.
> > > Well then you have a massive platform bug.
> > > 
> > > Two consecutive writes to the same bus address are perfectly
> > > l

Re: [PATCH v2 8/8] drm/amdgpu: Call drm_atomic_helper_shutdown() at shutdown time

2024-06-17 Thread Alex Deucher
On Wed, Jun 12, 2024 at 6:37 PM Douglas Anderson  wrote:
>
> Based on grepping through the source code this driver appears to be
> missing a call to drm_atomic_helper_shutdown() at system shutdown
> time. Among other things, this means that if a panel is in use that it
> won't be cleanly powered off at system shutdown time.
>
> The fact that we should call drm_atomic_helper_shutdown() in the case
> of OS shutdown/restart comes straight out of the kernel doc "driver
> instance overview" in drm_drv.c.
>
> Suggested-by: Maxime Ripard 
> Cc: Alex Deucher 
> Cc: Christian König 
> Cc: Xinhui Pan 
> Signed-off-by: Douglas Anderson 
> ---
> This commit is only compile-time tested.
>
> ...and further, I'd say that this patch is more of a plea for help
> than a patch I think is actually right. I'm _fairly_ certain that
> drm/amdgpu needs this call at shutdown time but the logic is a bit
> hard for me to follow. I'd appreciate if anyone who actually knows
> what this should look like could illuminate me, or perhaps even just
> post a patch themselves!

I'm not sure this patch makes sense or not.  The driver doesn't really
do a formal tear down in its shutdown routine, it just quiesces the
hardware.  What are the actual requirements of the shutdown function?
In the past when we did a full driver tear down in shutdown, it
delayed the shutdown sequence and users complained.

Alex

>
> (no changes since v1)
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  2 ++
>  3 files changed, 13 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index f87d53e183c3..c202a1d5ff5f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1197,6 +1197,7 @@ static inline struct amdgpu_device 
> *amdgpu_ttm_adev(struct ttm_device *bdev)
>  int amdgpu_device_init(struct amdgpu_device *adev,
>uint32_t flags);
>  void amdgpu_device_fini_hw(struct amdgpu_device *adev);
> +void amdgpu_device_shutdown_hw(struct amdgpu_device *adev);
>  void amdgpu_device_fini_sw(struct amdgpu_device *adev);
>
>  int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 861ccff78af9..a8c4b8412e04 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4531,6 +4531,16 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
>
>  }
>
> +void amdgpu_device_shutdown_hw(struct amdgpu_device *adev)
> +{
> +   if (adev->mode_info.mode_config_initialized) {
> +   if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
> +   drm_helper_force_disable_all(adev_to_drm(adev));
> +   else
> +   drm_atomic_helper_shutdown(adev_to_drm(adev));
> +   }
> +}
> +
>  void amdgpu_device_fini_sw(struct amdgpu_device *adev)
>  {
> int idx;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index ea14f1c8f430..b34bf9259d5c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2409,6 +2409,8 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
> struct drm_device *dev = pci_get_drvdata(pdev);
> struct amdgpu_device *adev = drm_to_adev(dev);
>
> +   amdgpu_device_shutdown_hw(adev);
> +
> if (amdgpu_ras_intr_triggered())
> return;
>
> --
> 2.45.2.505.gda0bf45e8d-goog
>


Re: [PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Christian König

Am 17.06.24 um 16:57 schrieb Icenowy Zheng:

在 2024-06-17星期一的 16:42 +0200,Christian König写道:

Am 17.06.24 um 16:30 schrieb Icenowy Zheng:

在 2024-06-17星期一的 15:59 +0200,Christian König写道:

Am 17.06.24 um 15:43 schrieb Icenowy Zheng:

在 2024-06-17星期一的 15:09 +0200,Christian König写道:

...

In this case shouldn't we write seq-1 before any work, and then
write
seq after work, like what is done in Mesa?

No. This hw workaround requires that two consecutive write
operations
happen directly behind each other on the PCIe bus with two
different
values.

Well to be honest the workaround code in Mesa seems to not be
working
in this way ...

Mesa doesn't have any workaround for that hw issue, the code there
uses
a quite different approach.

Ah? Commit bf26da927a1c ("drm/amdgpu: add cache flush workaround to
gfx8 emit_fence") says "Both PAL and Mesa use it for gfx8 too, so port
this commit to gfx_v8_0_ring_emit_fence_gfx", so maybe the workaround
should just be not necessary here?


What I meant was that Mesa doesn't have a hack like writing seq - 1 and 
then seq.


I haven't checked the code, but it uses a different approach with 64bit 
values as far as I know.



To make the software logic around that work without any changes
we
use
the values seq - 1 and seq because those are guaranteed to be
different
and not trigger any unwanted software behavior.

Only then we can guarantee that we have a coherent view of system
memory.

Any more details about it?

No, sorry. All I know is that it's a bug in the cache flush logic
which
can be worked around by issuing two write behind each other to the
same
location.

So the issue is that the first EOP write does not properly flush the
cache? Could EVENT_WRITE be used instead of EVENT_WRITE_EOP in this
workaround to properly flush it without hurting the fence value?


No, EVENT_WRITE is executed at a different time in the pipeline.


...

Well to be honest on a platform where even two consecutive writes to
the
same location doesn't work I would have strong doubts that it is
stable
in general.

Well I think the current situation is that the IRQ triggered by the
second EOP packet arrives before the second write is finished, not the
second write is totally dropped.


Well that sounds like the usual re-ordering problems we have seen 
patches for on Loongson multiple times now.


And I can only repeat what I've wrote before: We don't accept 
workarounds in drivers for problems cause by severely platform issues.


Especially when that is clearly against any PCIe specification.

Regards,
Christian.




Regards,
Christian.


Re: [PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Christian König

Am 17.06.24 um 17:35 schrieb Xi Ruoyao:

On Mon, 2024-06-17 at 22:30 +0800, Icenowy Zheng wrote:

Two consecutive writes to the same bus address are perfectly legal
from
the PCIe specification and can happen all the time, even without this
specific hw workaround.

Yes I know it, and I am not from Loongson, just some user trying to
mess around it.

There are some purposed "workarounds" like reducing the link speed (from
x16 to x8), tweaking the power management setting, etc.  Someone even
claims improving the heat sink of the LS7A chip can help to work around
this issue but I'm really skeptical...


Well when it's an ordering problem between writes and interrupts then 
nothing else than getting the order right will fix this. Otherwise it 
can always be that the CPU doesn't see coherent results from PCIe devices.


In other words if the CPU gets an interrupt but doesn't sees the fence 
value written it will assume the work is not done. But since the 
hardware won't trigger a second interrupt the CPU will then keep waiting 
for the operation to finish forever.


This is not limited to GPUs, but will potentially happen with network or 
disk I/O as well.


Regards,
Christian.


Re: [PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Icenowy Zheng
在 2024-06-17星期一的 15:59 +0200,Christian König写道:
> Am 17.06.24 um 15:43 schrieb Icenowy Zheng:
> > 在 2024-06-17星期一的 15:09 +0200,Christian König写道:
> > > Am 17.06.24 um 15:03 schrieb Icenowy Zheng:
> > > > 在 2024-06-17星期一的 14:35 +0200,Christian König写道:
> > > > > Am 17.06.24 um 12:58 schrieb Icenowy Zheng:
> > > > > > The duplication of EOP packets for GFX7/8, with the former
> > > > > > one
> > > > > > have
> > > > > > seq-1 written and the latter one have seq written, seems to
> > > > > > confuse
> > > > > > some
> > > > > > hardware platform (e.g. Loongson 7A series PCIe
> > > > > > controllers).
> > > > > > 
> > > > > > Make the content of the duplicated EOP packet the same with
> > > > > > the
> > > > > > real
> > > > > > one, only masking any possible interrupts.
> > > > > Well completely NAK to that, exactly that disables the
> > > > > workaround.
> > > > > 
> > > > > The CPU needs to see two different values written here.
> > > > Why do the CPU need to see two different values here? Only the
> > > > second
> > > > packet will raise an interrupt before and after applying this
> > > > patch,
> > > > and the first packet's result should just be overriden on
> > > > ordinary
> > > > platforms. The CPU won't see the first one, until it's polling
> > > > for
> > > > the
> > > > address for a very short interval, so short that the GPU CP
> > > > couldn't
> > > > execute 2 commands.
> > > Yes exactly that. We need to make two writes, one with the old
> > > value
> > > (seq - 1) and a second with the real value (seq).
> > > 
> > > Otherwise it is possible that a polling CPU would see the
> > > sequence
> > > before the second EOP is issued with results in incoherent view
> > > of
> > > memory.
> > In this case shouldn't we write seq-1 before any work, and then
> > write
> > seq after work, like what is done in Mesa?
> 
> No. This hw workaround requires that two consecutive write operations
> happen directly behind each other on the PCIe bus with two different
> values.
> 
> To make the software logic around that work without any changes we
> use 
> the values seq - 1 and seq because those are guaranteed to be
> different 
> and not trigger any unwanted software behavior.
> 
> Only then we can guarantee that we have a coherent view of system
> memory.

BTW is there any operation that could be taken to examine this specific
workaround?

Is there any case possible to reproduce?

> 
> > As what I see, Mesa uses another command buffer to emit a
> > EVENT_WRITE_EOP writing 0, and commit this command buffer before
> > the
> > real command buffer.
> > 
> > > > Or do you mean the GPU needs to see two different values being
> > > > written,
> > > > or they will be merged into only one write request?
> > > > 
> > > > Please give out more information about this workaround,
> > > > otherwise
> > > > the
> > > > GPU hang problem on Loongson platforms will persist.
> > > Well if Loongson can't handle two consecutive write operations to
> > > the
> > > same address with different values then you have a massive
> > > platform
> > > bug.
> > I think the issue is triggered when two consecutive write
> > operations
> > and one IRQ is present, which is exactly the case of this function.
> 
> Well then you have a massive platform bug.
> 
> Two consecutive writes to the same bus address are perfectly legal
> from 
> the PCIe specification and can happen all the time, even without this
> specific hw workaround.
> 
> Regards,
> Christian.
> 
> > 
> > > That is something which can happen all the time throughout the
> > > operation.
> > > 
> > > Regards,
> > > Christian.
> > > 
> > > > > Regards,
> > > > > Christian.
> > > > > 
> > > > > > Fixes: bf26da927a1c ("drm/amdgpu: add cache flush
> > > > > > workaround to
> > > > > > gfx8 emit_fence")
> > > > > > Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK
> > > > > > parts")
> > > > > > Signed-off-by: Icenowy Zheng 
> > > > > > ---
> > > > > >     drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 12 +---
> > > > > >     drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 
> > > > > >     2 files changed, 9 insertions(+), 15 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > > > > > b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > > > > > index 541dbd70d8c75..778f27f1a34fe 100644
> > > > > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> > > > > > @@ -2117,9 +2117,8 @@ static void
> > > > > > gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64
> > > > > > addr,
> > > > > >     {
> > > > > >   bool write64bit = flags &
> > > > > > AMDGPU_FENCE_FLAG_64BIT;
> > > > > >   bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
> > > > > > -   /* Workaround for cache flush problems. First send
> > > > > > a
> > > > > > dummy
> > > > > > EOP
> > > > > > -    * event down the pipe with seq one below.
> > > > > > -    */
> > > > > > +
> > > > > > +   /* Workaround for cac

[PATCH] drm/amdgpu: init TA fw for psp v14

2024-06-17 Thread Aurabindo Pillai
From: Likun Gao 

Add support to init TA firmware for psp v14.

Signed-off-by: Likun Gao 
---
 drivers/gpu/drm/amd/amdgpu/psp_v14_0.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
index cc0248efa6b6..4d33c95a5116 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
@@ -32,7 +32,9 @@
 #include "mp/mp_14_0_2_sh_mask.h"
 
 MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin");
 MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin");
 
 /* For large FW files the time to complete can be very long */
 #define USBC_PD_POLLING_LIMIT_S 240
@@ -64,6 +66,9 @@ static int psp_v14_0_init_microcode(struct psp_context *psp)
case IP_VERSION(14, 0, 2):
case IP_VERSION(14, 0, 3):
err = psp_init_sos_microcode(psp, ucode_prefix);
+   if (err)
+   return err;
+   err = psp_init_ta_microcode(psp, ucode_prefix);
if (err)
return err;
break;
-- 
2.45.2



Re: [PATCH] drm/amdgpu: init TA fw for psp v14

2024-06-17 Thread Alex Deucher
On Mon, Jun 17, 2024 at 3:14 PM Aurabindo Pillai
 wrote:
>
> From: Likun Gao 
>
> Add support to init TA firmware for psp v14.
>
> Signed-off-by: Likun Gao 

Acked-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/psp_v14_0.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c 
> b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
> index cc0248efa6b6..4d33c95a5116 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
> @@ -32,7 +32,9 @@
>  #include "mp/mp_14_0_2_sh_mask.h"
>
>  MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin");
> +MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin");
>  MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin");
> +MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin");
>
>  /* For large FW files the time to complete can be very long */
>  #define USBC_PD_POLLING_LIMIT_S 240
> @@ -64,6 +66,9 @@ static int psp_v14_0_init_microcode(struct psp_context *psp)
> case IP_VERSION(14, 0, 2):
> case IP_VERSION(14, 0, 3):
> err = psp_init_sos_microcode(psp, ucode_prefix);
> +   if (err)
> +   return err;
> +   err = psp_init_ta_microcode(psp, ucode_prefix);
> if (err)
> return err;
> break;
> --
> 2.45.2
>


[PATCH 1/2] drm/amd: Add reg definitions for DCN401 DCC

2024-06-17 Thread Aurabindo Pillai
Add the necessary register definitions to enable DCC on DCN4x

Signed-off-by: Aurabindo Pillai 
---
 .../include/asic_reg/dcn/dcn_4_1_0_sh_mask.h  | 110 ++
 1 file changed, 110 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h 
b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h
index 0c68f5d818bb..f42a276499cd 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h
@@ -6430,6 +6430,28 @@
 //DCHUBBUB_SDPIF_MEM_PWR_STATUS
 #define DCHUBBUB_SDPIF_MEM_PWR_STATUS__DCHUBBUB_SDPIF_MEM_PWR_STATE__SHIFT 
   0x0
 #define DCHUBBUB_SDPIF_MEM_PWR_STATUS__DCHUBBUB_SDPIF_MEM_PWR_STATE_MASK   
   0x0003L
+//DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_FLIP_AWAY_MISSING_PIPE0__SHIFT
 0x0
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_FLIP_AWAY_MISSING_PIPE1__SHIFT
 0x1
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_FLIP_AWAY_MISSING_PIPE2__SHIFT
 0x2
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_FLIP_AWAY_MISSING_PIPE3__SHIFT
 0x3
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_MCACHEID_INV_ERR_CLEAR_PIPE0__SHIFT
0xc
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_MCACHEID_INV_ERR_CLEAR_PIPE1__SHIFT
0xd
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_MCACHEID_INV_ERR_CLEAR_PIPE2__SHIFT
0xe
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_MCACHEID_INV_ERR_CLEAR_PIPE3__SHIFT
0xf
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_MCACHEID_INV_ERR_ENABLE__SHIFT
 0x1c
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_MCACHEID_INV_VREADY_MODE__SHIFT
0x1f
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_FLIP_AWAY_MISSING_PIPE0_MASK
   0x0001L
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_FLIP_AWAY_MISSING_PIPE1_MASK
   0x0002L
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_FLIP_AWAY_MISSING_PIPE2_MASK
   0x0004L
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_FLIP_AWAY_MISSING_PIPE3_MASK
   0x0008L
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_MCACHEID_INV_ERR_CLEAR_PIPE0_MASK
  0x1000L
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_MCACHEID_INV_ERR_CLEAR_PIPE1_MASK
  0x2000L
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_MCACHEID_INV_ERR_CLEAR_PIPE2_MASK
  0x4000L
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_MCACHEID_INV_ERR_CLEAR_PIPE3_MASK
  0x8000L
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_MCACHEID_INV_ERR_ENABLE_MASK
   0x1000L
+#define 
DCHUBBUB_SDPIF_MCACHE_INVALIDATION_CTL__DCHUBBUB_SDPIF_MCACHEID_INV_VREADY_MODE_MASK
  0x8000L
+
 
 
 // addressBlock: dcn_dcec_dchubbubl_hubbub_ret_path_dispdec
@@ -7084,6 +7106,11 @@
 #define HUBP0_DCSURF_PRI_VIEWPORT_START__PRI_VIEWPORT_Y_START__SHIFT   
   0x10
 #define HUBP0_DCSURF_PRI_VIEWPORT_START__PRI_VIEWPORT_X_START_MASK 
   0xL
 #define HUBP0_DCSURF_PRI_VIEWPORT_START__PRI_VIEWPORT_Y_START_MASK 
   0xL
+//HUBP0_DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE
+#define 
HUBP0_DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE__VIEWPORT_MCACHE_SPLIT_COORDINATE__SHIFT
0x0
+#define 
HUBP0_DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE__VIEWPORT_MCACHE_SPLIT_COORDINATE_C__SHIFT
  0x10
+#define 
HUBP0_DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE__VIEWPORT_MCACHE_SPLIT_COORDINATE_MASK
  0xL
+#define 
HUBP0_DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE__VIEWPORT_MCACHE_SPLIT_COORDINATE_C_MASK
0xL
 //HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION
 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 
   0x0
 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT
   0x10
@@ -7244,6 +7271,23 @@
 #define HUBP0_DCHUBP_MALL_SUB_VP2__SUB_VP_HEIGHT_NEXT_S1__SHIFT
   0xc
 #define HUBP0_DCHUBP_MALL_SUB_VP2__SUB_VP_HEIGHT_CURR_S1_MASK  
   0x0FFFL
 #define HUBP0_DCHUBP_MALL_SUB_VP2__SUB_VP_HEIGHT_NEXT_S1_MASK  
   0x00FFF000L
+//HUBP0_DCHUBP_MCACHEID_CONFIG
+#define HUBP0_DCHUBP_MCACHEID_CONFIG__

[PATCH 2/2] drm/amd/display: Enable DCC on DCN401

2024-06-17 Thread Aurabindo Pillai
Add registers and entry points to enable DCC on DCN4x

Signed-off-by: Aurabindo Pillai 
---
 drivers/gpu/drm/amd/display/dc/core/dc.c  |  10 +
 .../drm/amd/display/dc/core/dc_hw_sequencer.c |  11 +
 drivers/gpu/drm/amd/display/dc/dc.h   |   4 +
 .../drm/amd/display/dc/dml2/dml2_wrapper.c|   6 +
 .../drm/amd/display/dc/dml2/dml2_wrapper.h|   2 +-
 .../display/dc/hubbub/dcn30/dcn30_hubbub.c|   3 +
 .../display/dc/hubbub/dcn31/dcn31_hubbub.c|   3 +
 .../display/dc/hubbub/dcn401/dcn401_hubbub.c  | 280 ++
 .../display/dc/hubbub/dcn401/dcn401_hubbub.h  |   5 +
 .../amd/display/dc/hubp/dcn20/dcn20_hubp.h|  14 +
 .../amd/display/dc/hubp/dcn401/dcn401_hubp.c  |  21 ++
 .../amd/display/dc/hubp/dcn401/dcn401_hubp.h  |  14 +-
 .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c   |   4 +
 .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c |  25 ++
 .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h |   2 +
 .../amd/display/dc/hwss/dcn401/dcn401_init.c  |   1 +
 .../drm/amd/display/dc/hwss/hw_sequencer.h|   9 +
 .../gpu/drm/amd/display/dc/inc/core_types.h   |   3 +
 .../gpu/drm/amd/display/dc/inc/hw/dchubbub.h  |   4 +
 drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h  |   1 +
 .../dc/resource/dcn401/dcn401_resource.c  |   9 +
 .../dc/resource/dcn401/dcn401_resource.h  |   2 +
 22 files changed, 431 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c 
b/drivers/gpu/drm/amd/display/dc/core/dc.c
index a4ba6f99cd34..8c691bee49a3 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1264,6 +1264,9 @@ static void disable_dangling_plane(struct dc *dc, struct 
dc_state *context)
apply_ctx_interdependent_lock(dc, 
dc->current_state, old_stream, false);
dc->hwss.post_unlock_program_front_end(dc, 
dangling_context);
}
+
+   if (dc->res_pool->funcs->prepare_mcache_programming)
+   
dc->res_pool->funcs->prepare_mcache_programming(dc, dangling_context);
if (dc->hwss.program_front_end_for_ctx) {
dc->hwss.interdependent_update_lock(dc, 
dc->current_state, true);
dc->hwss.program_front_end_for_ctx(dc, 
dangling_context);
@@ -2037,6 +2040,8 @@ static enum dc_status dc_commit_state_no_check(struct dc 
*dc, struct dc_state *c
}
 
/* Program all planes within new context*/
+   if (dc->res_pool->funcs->prepare_mcache_programming)
+   dc->res_pool->funcs->prepare_mcache_programming(dc, context);
if (dc->hwss.program_front_end_for_ctx) {
dc->hwss.interdependent_update_lock(dc, context, true);
dc->hwss.program_front_end_for_ctx(dc, context);
@@ -3884,6 +3889,7 @@ static void commit_planes_for_stream(struct dc *dc,
odm_pipe->ttu_regs.min_ttu_vblank = MAX_TTU;
}
 
+
if ((update_type != UPDATE_TYPE_FAST) && 
stream->update_flags.bits.dsc_changed)
if (top_pipe_to_program &&

top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
@@ -3903,6 +3909,10 @@ static void commit_planes_for_stream(struct dc *dc,

top_pipe_to_program->stream_res.tg);
}
 
+   if (dc->hwss.wait_for_dcc_meta_propagation) {
+   dc->hwss.wait_for_dcc_meta_propagation(dc, top_pipe_to_program);
+   }
+
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
if (dc->hwss.subvp_pipe_control_lock)
dc->hwss.subvp_pipe_control_lock(dc, context, true, 
should_lock_all_pipes, NULL, subvp_prev_use);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 5037474bf95c..87e36d51c56d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -595,6 +595,12 @@ void hwss_build_fast_sequence(struct dc *dc,
if (!plane || !stream)
return;
 
+   if (dc->hwss.wait_for_dcc_meta_propagation) {
+   
block_sequence[*num_steps].params.wait_for_dcc_meta_propagation_params.dc = dc;
+   
block_sequence[*num_steps].params.wait_for_dcc_meta_propagation_params.top_pipe_to_program
 = pipe_ctx;
+   block_sequence[*num_steps].func = HUBP_WAIT_FOR_DCC_META_PROP;
+   (*num_steps)++;
+   }
if (dc->hwss.subvp_pipe_control_lock_fast) {

block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc;

block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = 
true;
@@ -835,6 +841,11 @@ void hwss_execute_sequence(struct dc *dc,
case DMUB_SUBVP_SAVE_SURF_ADD

RE: [PATCH] drm/amdgpu: normalize registers as local xcc to read/write under sriov

2024-06-17 Thread Jian, Jane
[AMD Official Use Only - AMD Internal Distribution Only]

Ping on this...
Thanks,
Jane

-Original Message-
From: Jane Jian 
Sent: Monday, June 17, 2024 6:11 PM
To: Lazar, Lijo ; Chang, HaiJun ; 
Zhao, Victor 
Cc: amd-gfx@lists.freedesktop.org; Jian, Jane 
Subject: [PATCH] drm/amdgpu: normalize registers as local xcc to read/write 
under sriov

[WHY]
sriov has the higher bit violation when flushing tlb

[HOW]
normalize the registers to keep lower 16-bit(dword aligned) to aviod higher bit 
violation RLCG will mask xcd out and always assume it's accessing its own xcd

also fix the typo in sriov_w/rreg:
for KIQ case, use xcc with xcc_id to read and write

v2
amend some typos

Signed-off-by: Jane Jian 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  | 12 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  8 ++--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  2 ++
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 63f2286858c4..d43652a38484 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1075,6 +1075,10 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev,
if (amdgpu_device_skip_hw_access(adev))
return;

+   /* Select lower 16 bits to write in local xcc */
+   if ((hwip == GC_HWIP) && !(acc_flags & AMDGPU_REGS_NO_KIQ))
+   offset = NORMALIZE_XCC_REG_OFFSET(offset);
+
if (!amdgpu_sriov_runtime(adev) &&
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, 
true, &rlcg_flag)) {
amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, 
xcc_id); @@ -1084,7 +1088,7 @@ void amdgpu_sriov_wreg(struct amdgpu_device 
*adev,
if (acc_flags & AMDGPU_REGS_NO_KIQ)
WREG32_NO_KIQ(offset, value);
else
-   WREG32(offset, value);
+   WREG32_XCC(offset, value, xcc_id);
 }

 u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, @@ -1095,6 +1099,10 @@ u32 
amdgpu_sriov_rreg(struct amdgpu_device *adev,
if (amdgpu_device_skip_hw_access(adev))
return 0;

+   /* Select lower 16 bits to read in local xcc */
+   if ((hwip == GC_HWIP) && !(acc_flags & AMDGPU_REGS_NO_KIQ))
+   offset = NORMALIZE_XCC_REG_OFFSET(offset);
+
if (!amdgpu_sriov_runtime(adev) &&
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, 
false, &rlcg_flag))
return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, 
xcc_id); @@ -1102,7 +1110,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
if (acc_flags & AMDGPU_REGS_NO_KIQ)
return RREG32_NO_KIQ(offset);
else
-   return RREG32(offset);
+   return RREG32_XCC(offset, xcc_id);
 }

 bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) diff --git 
a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 88b4644f8e96..5bb275b96e6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -853,8 +853,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 */
if (adev->gfx.kiq[inst].ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
-   uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
-   uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+   /* Select lower 16 bits to write in local xcc */
+   if (AMDGPU_IS_GFXHUB(vmhub)) {
+   req = NORMALIZE_XCC_REG_OFFSET(req);
+   ack = NORMALIZE_XCC_REG_OFFSET(ack);
+   }

amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
 1 << vmid, inst);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 242b24f73c17..9ddf68e7d06e 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -210,4 +210,6 @@
 #define WREG64_MCA(ext, mca_base, idx, val) \
WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + 
mca_base + (idx * 8), val)

+#define NORMALIZE_XCC_REG_OFFSET(offset) (offset & 0x)
+
 #endif
--
2.34.1



RE: [PATCH] drm/amdgpu: normalize registers as local xcc to read/write under sriov

2024-06-17 Thread Ma, Qing (Mark)
[AMD Official Use Only - AMD Internal Distribution Only]

@Lazar, Lijo Can you help to review the code?
Thanks

-Original Message-
From: Jian, Jane 
Sent: Tuesday, June 18, 2024 10:37 AM
To: Jian, Jane ; Lazar, Lijo ; Chang, 
HaiJun ; Zhao, Victor 
Cc: amd-gfx@lists.freedesktop.org; Ma, Qing (Mark) 
Subject: RE: [PATCH] drm/amdgpu: normalize registers as local xcc to read/write 
under sriov

[AMD Official Use Only - AMD Internal Distribution Only]

Ping on this...
Thanks,
Jane

-Original Message-
From: Jane Jian 
Sent: Monday, June 17, 2024 6:11 PM
To: Lazar, Lijo ; Chang, HaiJun ; 
Zhao, Victor 
Cc: amd-gfx@lists.freedesktop.org; Jian, Jane 
Subject: [PATCH] drm/amdgpu: normalize registers as local xcc to read/write 
under sriov

[WHY]
sriov has the higher bit violation when flushing tlb

[HOW]
normalize the registers to keep lower 16-bit(dword aligned) to aviod higher bit 
violation RLCG will mask xcd out and always assume it's accessing its own xcd

also fix the typo in sriov_w/rreg:
for KIQ case, use xcc with xcc_id to read and write

v2
amend some typos

Signed-off-by: Jane Jian 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  | 12 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  8 ++--
 drivers/gpu/drm/amd/amdgpu/soc15_common.h |  2 ++
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 63f2286858c4..d43652a38484 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1075,6 +1075,10 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev,
if (amdgpu_device_skip_hw_access(adev))
return;

+   /* Select lower 16 bits to write in local xcc */
+   if ((hwip == GC_HWIP) && !(acc_flags & AMDGPU_REGS_NO_KIQ))
+   offset = NORMALIZE_XCC_REG_OFFSET(offset);
+
if (!amdgpu_sriov_runtime(adev) &&
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, 
true, &rlcg_flag)) {
amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, 
xcc_id); @@ -1084,7 +1088,7 @@ void amdgpu_sriov_wreg(struct amdgpu_device 
*adev,
if (acc_flags & AMDGPU_REGS_NO_KIQ)
WREG32_NO_KIQ(offset, value);
else
-   WREG32(offset, value);
+   WREG32_XCC(offset, value, xcc_id);
 }

 u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, @@ -1095,6 +1099,10 @@ u32 
amdgpu_sriov_rreg(struct amdgpu_device *adev,
if (amdgpu_device_skip_hw_access(adev))
return 0;

+   /* Select lower 16 bits to read in local xcc */
+   if ((hwip == GC_HWIP) && !(acc_flags & AMDGPU_REGS_NO_KIQ))
+   offset = NORMALIZE_XCC_REG_OFFSET(offset);
+
if (!amdgpu_sriov_runtime(adev) &&
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, 
false, &rlcg_flag))
return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, 
xcc_id); @@ -1102,7 +1110,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
if (acc_flags & AMDGPU_REGS_NO_KIQ)
return RREG32_NO_KIQ(offset);
else
-   return RREG32(offset);
+   return RREG32_XCC(offset, xcc_id);
 }

 bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) diff --git 
a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 88b4644f8e96..5bb275b96e6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -853,8 +853,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 */
if (adev->gfx.kiq[inst].ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
-   uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
-   uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+   /* Select lower 16 bits to write in local xcc */
+   if (AMDGPU_IS_GFXHUB(vmhub)) {
+   req = NORMALIZE_XCC_REG_OFFSET(req);
+   ack = NORMALIZE_XCC_REG_OFFSET(ack);
+   }

amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
 1 << vmid, inst); diff --git 
a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 242b24f73c17..9ddf68e7d06e 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -210,4 +210,6 @@
 #define WREG64_MCA(ext, mca_base, idx, val) \
WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + 
mca_base + (idx * 8), val)

+#define NORMALIZE_XCC_REG_OFFSET(offset) (offset & 0x)
+
 #endif
--
2.34.1




Re: [PATCH] drm/amdgpu: avoid using null object of framebuffer

2024-06-17 Thread Huang Rui
On Fri, Jun 07, 2024 at 03:04:55PM +0800, Zhang, Julia wrote:
> Instead of using state->fb->obj[0] directly, get object from framebuffer
> by calling drm_gem_fb_get_obj() and return error code when object is
> null to avoid using null object of framebuffer.
> 
> Signed-off-by: Julia Zhang 

Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 17 +++--
>  1 file changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
> index d60c4a2eeb0c..d50ee0828935 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
> @@ -2,6 +2,7 @@
>  
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include "amdgpu.h"
> @@ -311,7 +312,13 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane 
> *plane,
>   return 0;
>   }
>   afb = to_amdgpu_framebuffer(new_state->fb);
> - obj = new_state->fb->obj[0];
> +
> + obj = drm_gem_fb_get_obj(new_state->fb, 0);
> + if (!obj) {
> + DRM_ERROR("Failed to get obj from framebuffer\n");
> + return -EINVAL;
> + }
> +
>   rbo = gem_to_amdgpu_bo(obj);
>   adev = amdgpu_ttm_adev(rbo->tbo.bdev);
>  
> @@ -365,12 +372,18 @@ static void amdgpu_vkms_cleanup_fb(struct drm_plane 
> *plane,
>  struct drm_plane_state *old_state)
>  {
>   struct amdgpu_bo *rbo;
> + struct drm_gem_object *obj = drm_gem_fb_get_obj(old_state->fb, 0);
>   int r;
>  
>   if (!old_state->fb)
>   return;
>  
> - rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]);
> + if (!obj) {
> + DRM_ERROR("Failed to get obj from framebuffer\n");
> + return;
> + }
> +
> + rbo = gem_to_amdgpu_bo(obj);
>   r = amdgpu_bo_reserve(rbo, false);
>   if (unlikely(r)) {
>   DRM_ERROR("failed to reserve rbo before unpin\n");
> -- 
> 2.34.1
> 


[PATCH v7 0/9] drm: Support per-plane async flip configuration

2024-06-17 Thread André Almeida
AMD hardware can do async flips with overlay planes, but currently there's no
easy way to enable that in DRM. To solve that, this patchset creates a new
drm_plane field, bool async_flip, that allows drivers to choose which plane can
or cannot do async flips. This is latter used on drm_atomic_set_property when
users want to do async flips.

Patch 1 allows async commits with IN_FENCE_ID in any driver.

Patches 2 to 7 have no functional change. As per current code, every driver that
allows async page flips using the atomic API, allows doing it only in the
primary plane. Those patches then enable it for every driver.

Every driver that I found out capable of doing async flips were changed here.
The drivers that weren't touch don't have any mention to
mode_config::async_page_flip, so they are not currently advertising to userspace
that they can do async flips.

Patch 8 changes the current DRM uAPI check from allowing only primary planes to
allowing any plane that the driver allows flipping asynchronously.

Patch 9 finally enables async flip on overlay planes for amdgpu.

Changes from v6:
- Added async_flip check for i915/skl planes (Rodrigo)
- Commit the plane->async_flip check just after all driver had set their
async_flip capabilities (Dmitry)
https://lore.kernel.org/dri-devel/20240614153535.351689-1-andrealm...@igalia.com/

Changes from v5:
- Instead of enabling plane->async_flip in the common code, move it to driver
code.
- Enable primary plane async flip on every driver
https://lore.kernel.org/dri-devel/20240612193713.167448-1-andrealm...@igalia.com/

André Almeida (9):
  drm/atomic: Allow userspace to use explicit sync with atomic async
flips
  drm: Support per-plane async flip configuration
  drm/amdgpu: Enable async flips on the primary plane
  drm: atmel-hlcdc: Enable async flips on the primary plane
  drm/i915: Enable async flips on the primary plane
  drm/nouveau: Enable async flips on the primary plane
  drm/vc4: Enable async flips on the primary plane
  drm: Enable per-plane async flip check
  drm/amdgpu: Make it possible to async flip overlay planes

 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 2 ++
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c | 3 +++
 drivers/gpu/drm/drm_atomic_uapi.c   | 8 +---
 drivers/gpu/drm/i915/display/i9xx_plane.c   | 3 +++
 drivers/gpu/drm/i915/display/skl_universal_plane.c  | 1 +
 drivers/gpu/drm/nouveau/dispnv04/crtc.c | 4 
 drivers/gpu/drm/nouveau/dispnv50/wndw.c | 4 
 drivers/gpu/drm/vc4/vc4_plane.c | 4 +++-
 include/drm/drm_plane.h | 5 +
 9 files changed, 30 insertions(+), 4 deletions(-)

-- 
2.45.2



[PATCH v7 1/9] drm/atomic: Allow userspace to use explicit sync with atomic async flips

2024-06-17 Thread André Almeida
Allow userspace to use explicit synchronization with atomic async flips.
That means that the flip will wait for some hardware fence, and then
will flip as soon as possible (async) in regard of the vblank.

Signed-off-by: André Almeida 
---
 drivers/gpu/drm/drm_atomic_uapi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index 22bbb2d83e30..2e1d9391febe 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -1070,7 +1070,9 @@ int drm_atomic_set_property(struct drm_atomic_state 
*state,
break;
}
 
-   if (async_flip && prop != config->prop_fb_id) {
+   if (async_flip &&
+   prop != config->prop_fb_id &&
+   prop != config->prop_in_fence_fd) {
ret = drm_atomic_plane_get_property(plane, plane_state,
prop, &old_val);
ret = drm_atomic_check_prop_changes(ret, old_val, 
prop_value, prop);
-- 
2.45.2



[PATCH v7 2/9] drm: Support per-plane async flip configuration

2024-06-17 Thread André Almeida
Drivers have different capabilities on what plane types they can or
cannot perform async flips. Create a plane::async_flip field so each
driver can choose which planes they allow doing async flips.

Signed-off-by: André Almeida 
---
 include/drm/drm_plane.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 9507542121fa..0bebc72af5c3 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -786,6 +786,11 @@ struct drm_plane {
 * @kmsg_panic: Used to register a panic notifier for this plane
 */
struct kmsg_dumper kmsg_panic;
+
+   /**
+* @async_flip: indicates if a plane can do async flips
+*/
+   bool async_flip;
 };
 
 #define obj_to_plane(x) container_of(x, struct drm_plane, base)
-- 
2.45.2



[PATCH v7 4/9] drm: atmel-hlcdc: Enable async flips on the primary plane

2024-06-17 Thread André Almeida
This driver can perfom async flips on primary planes, so enable it.

Signed-off-by: André Almeida 
---
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c 
b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index 4a7ba0918eca..22b8a5c888ef 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -1227,6 +1227,9 @@ static int atmel_hlcdc_plane_create(struct drm_device 
*dev,
if (ret)
return ret;
 
+   if (type == DRM_PLANE_TYPE_PRIMARY)
+   plane->base.async_flip = true;
+
drm_plane_helper_add(&plane->base,
 &atmel_hlcdc_layer_plane_helper_funcs);
 
-- 
2.45.2



[PATCH v7 3/9] drm/amdgpu: Enable async flips on the primary plane

2024-06-17 Thread André Almeida
This driver can perfom async flips on primary planes, so enable it.

Signed-off-by: André Almeida 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 8a4c40b4c27e..0c126c5609d3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1705,6 +1705,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager 
*dm,
 
if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
drm_plane_create_zpos_immutable_property(plane, 0);
+   plane->async_flip = true;
} else if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
unsigned int zpos = 1 + drm_plane_index(plane);
drm_plane_create_zpos_property(plane, zpos, 1, 254);
-- 
2.45.2



[PATCH v7 5/9] drm/i915: Enable async flips on the primary plane

2024-06-17 Thread André Almeida
This driver can perfom async flips on primary planes, so enable it.

Signed-off-by: André Almeida 
---
 drivers/gpu/drm/i915/display/i9xx_plane.c  | 3 +++
 drivers/gpu/drm/i915/display/skl_universal_plane.c | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c 
b/drivers/gpu/drm/i915/display/i9xx_plane.c
index 0279c8aabdd1..0142beef20dc 100644
--- a/drivers/gpu/drm/i915/display/i9xx_plane.c
+++ b/drivers/gpu/drm/i915/display/i9xx_plane.c
@@ -931,6 +931,9 @@ intel_primary_plane_create(struct drm_i915_private 
*dev_priv, enum pipe pipe)
 
intel_plane_helper_add(plane);
 
+   if (plane->async_flip)
+   plane->base.async_flip = true;
+
return plane;
 
 fail:
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c 
b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index 860574d04f88..8d0a9f69709a 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -2371,6 +2371,7 @@ skl_universal_plane_create(struct drm_i915_private 
*dev_priv,
plane->async_flip = skl_plane_async_flip;
plane->enable_flip_done = skl_plane_enable_flip_done;
plane->disable_flip_done = skl_plane_disable_flip_done;
+   plane->base.async_flip = true;
}
 
if (DISPLAY_VER(dev_priv) >= 11)
-- 
2.45.2



[PATCH v7 6/9] drm/nouveau: Enable async flips on the primary plane

2024-06-17 Thread André Almeida
This driver can perfom async flips on primary planes, so enable it.

Signed-off-by: André Almeida 
---
 drivers/gpu/drm/nouveau/dispnv04/crtc.c | 4 
 drivers/gpu/drm/nouveau/dispnv50/wndw.c | 4 
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c 
b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index 4310ad71870b..fd06d46d49ec 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -1285,6 +1285,7 @@ int
 nv04_crtc_create(struct drm_device *dev, int crtc_num)
 {
struct nouveau_display *disp = nouveau_display(dev);
+   struct nouveau_drm *drm = nouveau_drm(dev);
struct nouveau_crtc *nv_crtc;
struct drm_plane *primary;
int ret;
@@ -1338,6 +1339,9 @@ nv04_crtc_create(struct drm_device *dev, int crtc_num)
if (ret)
return ret;
 
+   if (drm->client.device.info.chipset >= 0x11)
+   primary->async_flip = true;
+
return nvif_head_vblank_event_ctor(&nv_crtc->head, "kmsVbl", 
nv04_crtc_vblank_handler,
   false, &nv_crtc->vblank);
 }
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c 
b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
index 7a2cceaee6e9..55db0fdf61e7 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -763,6 +763,10 @@ nv50_wndw_new_(const struct nv50_wndw_func *func, struct 
drm_device *dev,
return ret;
}
 
+   if (type == DRM_PLANE_TYPE_PRIMARY &&
+   drm->client.device.info.chipset >= 0x11)
+   wndw->plane.async_flip = true;
+
return 0;
 }
 
-- 
2.45.2



[PATCH v7 7/9] drm/vc4: Enable async flips on the primary plane

2024-06-17 Thread André Almeida
This driver can perfom async flips on primary planes, so enable it.

Signed-off-by: André Almeida 
---
 drivers/gpu/drm/vc4/vc4_plane.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 07caf2a47c6c..e3d41da14e6f 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -1672,8 +1672,10 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
  DRM_COLOR_YCBCR_BT709,
  DRM_COLOR_YCBCR_LIMITED_RANGE);
 
-   if (type == DRM_PLANE_TYPE_PRIMARY)
+   if (type == DRM_PLANE_TYPE_PRIMARY) {
drm_plane_create_zpos_immutable_property(plane, 0);
+   plane->async_flip = true;
+   }
 
return plane;
 }
-- 
2.45.2



[PATCH v7 8/9] drm: Enable per-plane async flip check

2024-06-17 Thread André Almeida
Replace the generic "is this plane primary" for a plane::async_flip
check, so DRM follows the plane restrictions set by the driver.

Signed-off-by: André Almeida 
---
 drivers/gpu/drm/drm_atomic_uapi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index 2e1d9391febe..ed1af3455477 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -1079,9 +1079,9 @@ int drm_atomic_set_property(struct drm_atomic_state 
*state,
break;
}
 
-   if (async_flip && plane_state->plane->type != 
DRM_PLANE_TYPE_PRIMARY) {
+   if (async_flip && !plane->async_flip) {
drm_dbg_atomic(prop->dev,
-  "[OBJECT:%d] Only primary planes can be 
changed during async flip\n",
+  "[PLANE:%d] does not support async 
flips\n",
   obj->id);
ret = -EINVAL;
break;
-- 
2.45.2



[PATCH v7 9/9] drm/amdgpu: Make it possible to async flip overlay planes

2024-06-17 Thread André Almeida
amdgpu can handle async flips on overlay planes, so mark it as true
during the plane initialization.

Signed-off-by: André Almeida 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 0c126c5609d3..7d508d816f0d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1709,6 +1709,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager 
*dm,
} else if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
unsigned int zpos = 1 + drm_plane_index(plane);
drm_plane_create_zpos_property(plane, zpos, 1, 254);
+   plane->async_flip = true;
} else if (plane->type == DRM_PLANE_TYPE_CURSOR) {
drm_plane_create_zpos_immutable_property(plane, 255);
}
-- 
2.45.2



Re: [PATCH 1/2] drm/amdgpu: make duplicated EOP packet for GFX7/8 have real content

2024-06-17 Thread Christian König

Am 17.06.24 um 18:09 schrieb Icenowy Zheng:

BTW is there any operation that could be taken to examine this specific
workaround?

Is there any case possible to reproduce?


No idea, I mean that's for GFX7/8 which was released between 2013 and 2017.

My educated guess is that you could create a test where you write 
something to VRAM or GTT with a shader and than in the interrupt handler 
try to observer if you can see those values with the CPU.


If those values aren't visible with the CPU then you have either a cache 
flushing issue or your CPU and/or PCIe root complex is incorrectly 
reordering writes and interrupts.


Regards,
Christian.

[PATCH 1/5] drm/amdgpu: add variable to record the deferred error number read by driver

2024-06-17 Thread YiPeng Chai
Add variable to record the deferred error
number read by driver.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 62 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  3 +-
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c  |  4 +-
 3 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index d0dcd3d37e6d..ec8e848122f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -120,7 +120,7 @@ const char *get_ras_block_str(struct ras_common_if 
*ras_block)
 /* typical ECC bad page rate is 1 bad page per 100MB VRAM */
 #define RAS_BAD_PAGE_COVER  (100 * 1024 * 1024ULL)
 
-#define MAX_UMC_POISON_POLLING_TIME_ASYNC  100  //ms
+#define MAX_UMC_POISON_POLLING_TIME_ASYNC  300  //ms
 
 #define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100  //ms
 
@@ -2773,7 +2773,8 @@ static void amdgpu_ras_ecc_log_init(struct 
ras_ecc_log_info *ecc_log)
memset(&ecc_log->ecc_key, 0xad, sizeof(ecc_log->ecc_key));
 
INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL);
-   ecc_log->de_updated = false;
+   ecc_log->de_queried_count = 0;
+   ecc_log->prev_de_queried_count = 0;
 }
 
 static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
@@ -2792,7 +2793,8 @@ static void amdgpu_ras_ecc_log_fini(struct 
ras_ecc_log_info *ecc_log)
mutex_unlock(&ecc_log->lock);
 
mutex_destroy(&ecc_log->lock);
-   ecc_log->de_updated = false;
+   ecc_log->de_queried_count = 0;
+   ecc_log->prev_de_queried_count = 0;
 }
 #endif
 
@@ -2820,40 +2822,64 @@ static void amdgpu_ras_do_page_retirement(struct 
work_struct *work)
mutex_unlock(&con->umc_ecc_log.lock);
 }
 
-static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
-   uint32_t timeout_ms)
+static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
+   uint32_t poison_creation_count)
 {
int ret = 0;
struct ras_ecc_log_info *ecc_log;
struct ras_query_if info;
-   uint32_t timeout = timeout_ms;
+   uint32_t timeout = 0;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+   uint64_t de_queried_count;
+   uint32_t new_detect_count, total_detect_count;
+   uint32_t need_query_count = poison_creation_count;
+   bool query_data_timeout = false;
 
memset(&info, 0, sizeof(info));
info.head.block = AMDGPU_RAS_BLOCK__UMC;
 
ecc_log = &ras->umc_ecc_log;
-   ecc_log->de_updated = false;
+   total_detect_count = 0;
do {
ret = amdgpu_ras_query_error_status(adev, &info);
-   if (ret) {
-   dev_err(adev->dev, "Failed to query ras error! 
ret:%d\n", ret);
-   return;
+   if (ret)
+   return ret;
+
+   de_queried_count = ecc_log->de_queried_count;
+   if (de_queried_count > ecc_log->prev_de_queried_count) {
+   new_detect_count = de_queried_count - 
ecc_log->prev_de_queried_count;
+   ecc_log->prev_de_queried_count = de_queried_count;
+   timeout = 0;
+   } else {
+   new_detect_count = 0;
}
 
-   if (timeout && !ecc_log->de_updated) {
-   msleep(1);
-   timeout--;
+   if (new_detect_count) {
+   total_detect_count += new_detect_count;
+   } else {
+   if (!timeout && need_query_count)
+   timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC;
+
+   if (timeout) {
+   if (!--timeout) {
+   query_data_timeout = true;
+   break;
+   }
+   msleep(1);
+   }
}
-   } while (timeout && !ecc_log->de_updated);
+   } while (total_detect_count < need_query_count);
 
-   if (timeout_ms && !timeout) {
-   dev_warn(adev->dev, "Can't find deferred error\n");
-   return;
+   if (query_data_timeout) {
+   dev_warn(adev->dev, "Can't find deferred error! count: %u\n",
+   (need_query_count - total_detect_count));
+   return 0;
}
 
-   if (!ret)
+   if (total_detect_count)
schedule_delayed_work(&ras->page_retirement_dwork, 0);
+
+   return 0;
 }
 
 static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index e70c45712ddb..c50f5f2b1f4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/

[PATCH 2/5] drm/amdgpu: refine poison creation interrupt handler

2024-06-17 Thread YiPeng Chai
In order to apply to the case where a large number
of ras poison interrupts:
1. Change to use variable to record poison creation
   requests to avoid fifo full.
2. Prioritize handling poison creation requests
   instead of following the order of requests
   received by the driver.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 41 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  1 +
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ec8e848122f9..13cd6a9234f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2093,10 +2093,8 @@ static void 
amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj
if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
 
-   amdgpu_ras_put_poison_req(obj->adev,
-   AMDGPU_RAS_BLOCK__UMC, 0, NULL, NULL, false);
-
atomic_inc(&con->page_retirement_req_cnt);
+   atomic_inc(&con->poison_creation_count);
 
wake_up(&con->page_retirement_wq);
}
@@ -2873,7 +2871,7 @@ static int amdgpu_ras_poison_creation_handler(struct 
amdgpu_device *adev,
if (query_data_timeout) {
dev_warn(adev->dev, "Can't find deferred error! count: %u\n",
(need_query_count - total_detect_count));
-   return 0;
+   return -ENOENT;
}
 
if (total_detect_count)
@@ -2908,6 +2906,8 @@ static int amdgpu_ras_page_retirement_thread(void *param)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)param;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+   uint32_t poison_creation_count;
+   int ret;
struct ras_poison_msg poison_msg;
enum amdgpu_ras_block ras_block;
bool poison_creation_is_handled = false;
@@ -2921,7 +2921,20 @@ static int amdgpu_ras_page_retirement_thread(void *param)
if (kthread_should_stop())
break;
 
-   atomic_dec(&con->page_retirement_req_cnt);
+
+   do {
+   poison_creation_count = 
atomic_read(&con->poison_creation_count);
+   ret = amdgpu_ras_poison_creation_handler(adev, 
poison_creation_count);
+   if (ret == -EIO)
+   break;
+
+   if (poison_creation_count) {
+   while (poison_creation_count--) {
+   atomic_dec(&con->poison_creation_count);
+   
atomic_dec(&con->page_retirement_req_cnt);
+   }
+   }
+   } while (atomic_read(&con->poison_creation_count));
 
 #ifdef HAVE_KFIFO_PUT_NON_POINTER
if (!amdgpu_ras_get_poison_req(adev, &poison_msg))
@@ -2932,24 +2945,8 @@ static int amdgpu_ras_page_retirement_thread(void *param)
dev_dbg(adev->dev, "Start processing ras block %s(%d)\n",
ras_block_str(ras_block), ras_block);
 
-   if (ras_block == AMDGPU_RAS_BLOCK__UMC) {
-   amdgpu_ras_poison_creation_handler(adev,
-   MAX_UMC_POISON_POLLING_TIME_ASYNC);
-   poison_creation_is_handled = true;
-   } else {
-   /* poison_creation_is_handled:
-*   false: no poison creation interrupt, but it has 
poison
-*  consumption interrupt.
-*   true: It has poison creation interrupt at the 
beginning,
-* but it has no poison creation interrupt 
later.
-*/
-   amdgpu_ras_poison_creation_handler(adev,
-   poison_creation_is_handled ?
-   0 : MAX_UMC_POISON_POLLING_TIME_ASYNC);
 
amdgpu_ras_poison_consumption_handler(adev, 
&poison_msg);
-   poison_creation_is_handled = false;
-   }
 #else
 dev_info(adev->dev, "Start processing page retirement. request:%d\n",
 atomic_read(&con->page_retirement_req_cnt));
@@ -3029,6 +3026,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
mutex_init(&con->page_retirement_lock);
init_waitqueue_head(&con->page_retirement_wq);
atomic_set(&con->page_retirement_req_cnt, 0);
+   atomic_set(&con->poison_creation_count, 0);
con->page_retirement_thread =
kthread_run(amdgpu_ras_page_retirement_thread, adev, 
"umc_page_retirement");
if (IS_ERR(con->page_retirement_thread)) {
@@ -3080,6 +

[PATCH 3/5] drm/amdgpu: refine poison consumption interrupt handler

2024-06-17 Thread YiPeng Chai
1. The poison fifo is only used for poison consumption
   requests.
2. Merge reset requests when poison fifo caches multiple
   poison consumption messages

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 58 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 12 ++---
 2 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 13cd6a9234f2..898889600771 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2881,22 +2881,40 @@ static int amdgpu_ras_poison_creation_handler(struct 
amdgpu_device *adev,
 }
 
 static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
-   struct ras_poison_msg *poison_msg)
+   uint32_t msg_count, uint32_t *gpu_reset)
 {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-   uint32_t reset = poison_msg->reset;
-   uint16_t pasid = poison_msg->pasid;
+   uint32_t reset_flags = 0, reset = 0;
+   struct ras_poison_msg msg;
+   int ret, i;
 
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
 
-   if (poison_msg->pasid_fn)
-   poison_msg->pasid_fn(adev, pasid, poison_msg->data);
+   for (i = 0; i < msg_count; i++) {
+   ret = amdgpu_ras_get_poison_req(adev, &msg);
+   if (!ret)
+   continue;
+
+   if (msg.pasid_fn)
+   msg.pasid_fn(adev, msg.pasid, msg.data);
+
+   reset_flags |= msg.reset;
+   }
+
+   if (reset_flags) {
+   if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET)
+   reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+   else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET)
+   reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+   else
+   reset = reset_flags;
 
-   if (reset) {
flush_delayed_work(&con->page_retirement_dwork);
 
con->gpu_reset_flags |= reset;
amdgpu_ras_reset_gpu(adev);
+
+   *gpu_reset = reset;
}
 
return 0;
@@ -2906,11 +2924,9 @@ static int amdgpu_ras_page_retirement_thread(void *param)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)param;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-   uint32_t poison_creation_count;
+   uint32_t poison_creation_count, msg_count;
+   uint32_t gpu_reset;
int ret;
-   struct ras_poison_msg poison_msg;
-   enum amdgpu_ras_block ras_block;
-   bool poison_creation_is_handled = false;
 
while (!kthread_should_stop()) {
 
@@ -2921,6 +2937,7 @@ static int amdgpu_ras_page_retirement_thread(void *param)
if (kthread_should_stop())
break;
 
+   gpu_reset = 0;
 
do {
poison_creation_count = 
atomic_read(&con->poison_creation_count);
@@ -2937,16 +2954,19 @@ static int amdgpu_ras_page_retirement_thread(void 
*param)
} while (atomic_read(&con->poison_creation_count));
 
 #ifdef HAVE_KFIFO_PUT_NON_POINTER
-   if (!amdgpu_ras_get_poison_req(adev, &poison_msg))
-   continue;
-
-   ras_block = poison_msg.block;
-
-   dev_dbg(adev->dev, "Start processing ras block %s(%d)\n",
-   ras_block_str(ras_block), ras_block);
-
+   if (ret != -EIO) {
+   msg_count = kfifo_len(&con->poison_fifo);
+   if (msg_count) {
+   ret = 
amdgpu_ras_poison_consumption_handler(adev,
+   msg_count, &gpu_reset);
+   if ((ret != -EIO) &&
+   (gpu_reset != 
AMDGPU_RAS_GPU_RESET_MODE1_RESET)) {
+   while (msg_count--)
+   
atomic_dec(&con->page_retirement_req_cnt);
+   }
+   }
+   }
 
-   amdgpu_ras_poison_consumption_handler(adev, 
&poison_msg);
 #else
 dev_info(adev->dev, "Start processing page retirement. request:%d\n",
 atomic_read(&con->page_retirement_req_cnt));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 1dbe69eabb9a..47a46bf49a06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -296,13 +296,15 @@ int amdgpu_umc_pasid_poison_handler(struct amdgpu_device 
*adev,
struct amdgpu_ras *con = 
amdgpu_ras_get_context(adev);
 
 #ifdef HAVE_KFIFO_PUT_NON_POINTER
-   amdgpu_ras_put_poison_req(adev,
+ 

[PATCH 5/5] drm/amdgpu: add gpu reset check and exception handling

2024-06-17 Thread YiPeng Chai
Add gpu reset check and exception handling for
page retirement.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 43 +
 1 file changed, 43 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7f8e6ca07957..635dc86dbfd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1386,10 +1386,15 @@ int amdgpu_ras_query_error_status(struct amdgpu_device 
*adev, struct ras_query_i
memset(&qctx, 0, sizeof(qctx));
qctx.event_id = amdgpu_ras_acquire_event_id(adev, 
amdgpu_ras_intr_triggered() ?
   RAS_EVENT_TYPE_ISR : 
RAS_EVENT_TYPE_INVALID);
+
+   if (!down_read_trylock(&adev->reset_domain->sem))
+   return -EIO;
+
ret = amdgpu_ras_query_error_status_helper(adev, info,
   &err_data,
   &qctx,
   error_query_mode);
+   up_read(&adev->reset_domain->sem);
if (ret)
goto out_fini_err_data;
 
@@ -2884,6 +2889,14 @@ static int amdgpu_ras_poison_creation_handler(struct 
amdgpu_device *adev,
return 0;
 }
 
+static void amdgpu_ras_clear_poison_fifo(struct amdgpu_device *adev)
+{
+   struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+   struct ras_poison_msg msg;
+
+   while (kfifo_get(&con->poison_fifo, &msg));
+}
+
 static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
uint32_t msg_count, uint32_t *gpu_reset)
 {
@@ -2913,6 +2926,11 @@ static int amdgpu_ras_poison_consumption_handler(struct 
amdgpu_device *adev,
else
reset = reset_flags;
 
+   /* Check if gpu is in reset state */
+   if (!down_read_trylock(&adev->reset_domain->sem))
+   return -EIO;
+   up_read(&adev->reset_domain->sem);
+
flush_delayed_work(&con->page_retirement_dwork);
 
reinit_completion(&con->ras_recovery_completion);
@@ -2977,6 +2995,31 @@ static int amdgpu_ras_page_retirement_thread(void *param)
}
}
 
+   if ((ret == -EIO) || (gpu_reset == 
AMDGPU_RAS_GPU_RESET_MODE1_RESET)) {
+   /* gpu is in mode-1 reset state */
+   /* Clear poison creation request */
+   while (atomic_read(&con->poison_creation_count))
+   atomic_dec(&con->poison_creation_count);
+
+   /* Clear poison consumption fifo */
+   amdgpu_ras_clear_poison_fifo(adev);
+
+   while (atomic_read(&con->page_retirement_req_cnt))
+   atomic_dec(&con->page_retirement_req_cnt);
+
+   if (ret == -EIO) {
+   /* Wait for mode-1 reset to complete */
+   down_read(&adev->reset_domain->sem);
+   up_read(&adev->reset_domain->sem);
+   }
+
+   /* Wake up work queue to save bad pages to eeprom */
+   schedule_delayed_work(&con->page_retirement_dwork, 0);
+   } else if (gpu_reset) {
+   /* gpu is in mode-2 reset or other reset state */
+   /* Wake up work queue to save bad pages to eeprom */
+   schedule_delayed_work(&con->page_retirement_dwork, 0);
+   }
 #else
 dev_info(adev->dev, "Start processing page retirement. request:%d\n",
 atomic_read(&con->page_retirement_req_cnt));
-- 
2.34.1



[PATCH 4/5] drm/amdgpu: add completion to wait for ras reset to complete

2024-06-17 Thread YiPeng Chai
Add completion to wait for ras reset to complete.

Signed-off-by: YiPeng Chai 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 11 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  1 +
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 898889600771..7f8e6ca07957 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -124,6 +124,8 @@ const char *get_ras_block_str(struct ras_common_if 
*ras_block)
 
 #define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100  //ms
 
+#define MAX_RAS_RECOVERY_COMPLETION_TIME  12 //ms
+
 enum amdgpu_ras_retire_page_reservation {
AMDGPU_RAS_RETIRE_PAGE_RESERVED,
AMDGPU_RAS_RETIRE_PAGE_PENDING,
@@ -2518,6 +2520,8 @@ static void amdgpu_ras_do_recovery(struct work_struct 
*work)
atomic_set(&hive->ras_recovery, 0);
amdgpu_put_xgmi_hive(hive);
}
+
+   complete_all(&ras->ras_recovery_completion);
 }
 
 /* alloc/realloc bps array */
@@ -2911,10 +2915,16 @@ static int amdgpu_ras_poison_consumption_handler(struct 
amdgpu_device *adev,
 
flush_delayed_work(&con->page_retirement_dwork);
 
+   reinit_completion(&con->ras_recovery_completion);
+
con->gpu_reset_flags |= reset;
amdgpu_ras_reset_gpu(adev);
 
*gpu_reset = reset;
+   if (!wait_for_completion_timeout(&con->ras_recovery_completion,
+   
msecs_to_jiffies(MAX_RAS_RECOVERY_COMPLETION_TIME)))
+   dev_err(adev->dev, "Waiting for GPU to complete ras 
reset timeout! reset:0x%x\n",
+   reset);
}
 
return 0;
@@ -3041,6 +3051,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
}
}
 
+   init_completion(&con->ras_recovery_completion);
mutex_init(&con->page_rsv_lock);
INIT_KFIFO(con->poison_fifo);
mutex_init(&con->page_retirement_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 91daf48be03a..b47f03edac87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -537,6 +537,7 @@ struct amdgpu_ras {
DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128);
struct ras_ecc_log_info  umc_ecc_log;
struct delayed_work page_retirement_dwork;
+   struct completion ras_recovery_completion;
 
/* Fatal error detected flag */
atomic_t fed;
-- 
2.34.1