date:20211222

From: David Yat Sin 

Checkpoint contents of queue control stacks on CRIU dump and restore them
during CRIU restore.

Signed-off-by: David Yat Sin 
Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   |  2 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 23 ---
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  9 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  | 11 +++-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  | 13 ++--
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 14 +++--
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 29 +++--
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   | 22 +--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  5 +-
 .../amd/amdkfd/kfd_process_queue_manager.c| 62 +--
 11 files changed, 139 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 146879cd3f2b..582b4a393f95 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -312,7 +312,7 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
p->pasid,
dev->id);
 
-   err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, 
NULL, NULL,
+   err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, 
NULL, NULL, NULL,
&doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 3a5303ebcabf..8eca9ed3ab36 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
properties.type = KFD_QUEUE_TYPE_DIQ;
 
status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
-   &properties, &qid, NULL, NULL, NULL);
+   &properties, &qid, NULL, NULL, NULL, NULL);
 
if (status) {
pr_err("Failed to create DIQ\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a92274f9f1f7..248e69c7960b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -332,7 +332,7 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
struct queue *q,
struct qcm_process_device *qpd,
const struct kfd_criu_queue_priv_data *qd,
-   const void *restore_mqd)
+   const void *restore_mqd, const void 
*restore_ctl_stack)
 {
struct mqd_manager *mqd_mgr;
int retval;
@@ -394,7 +394,8 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
 
if (qd)
mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 
&q->gart_mqd_addr,
-&q->properties, restore_mqd);
+&q->properties, restore_mqd, 
restore_ctl_stack,
+qd->ctl_stack_size);
else
mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
@@ -1347,7 +1348,7 @@ static void destroy_kernel_queue_cpsch(struct 
device_queue_manager *dqm,
 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue 
*q,
struct qcm_process_device *qpd,
const struct kfd_criu_queue_priv_data *qd,
-   const void *restore_mqd)
+   const void *restore_mqd, const void *restore_ctl_stack)
 {
int retval;
struct mqd_manager *mqd_mgr;
@@ -1393,9 +1394,11 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
 * updates the is_evicted flag but is a no-op otherwise.
 */
q->properties.is_evicted = !!qpd->evicted;
+
if (qd)
mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 
&q->gart_mqd_addr,
-&q->properties, restore_mqd);
+&q->properties, restore_mqd, 
restore_ctl_stack,
+qd->ctl_stack_size);
else
mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
@@ -1788,7 +1791,8 @@ static int get_wave_state(struct device_queue_manager 
*dqm,
 
 static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
const struct queue *q,
-   u32 *mqd_

Re: [PATCH] drm/ttm: Don't inherit GEM object VMAs in child process

2021-12-22 Thread Bhardwaj, Rajneesh

Adding Adrian Rebel who is the CRIU maintainer and CRIU list

On 12/22/2021 3:53 PM, Daniel Vetter wrote:

On Mon, Dec 20, 2021 at 01:12:51PM -0500, Bhardwaj, Rajneesh wrote:

On 12/20/2021 4:29 AM, Daniel Vetter wrote:

On Fri, Dec 10, 2021 at 07:58:50AM +0100, Christian König wrote:

Am 09.12.21 um 19:28 schrieb Felix Kuehling:

Am 2021-12-09 um 10:30 a.m. schrieb Christian König:

That still won't work.

But I think we could do this change for the amdgpu mmap callback only.

If graphics user mode has problems with it, we could even make this
specific to KFD BOs in the amdgpu_gem_object_mmap callback.

I think it's fine for the whole amdgpu stack, my concern is more about
radeon, nouveau and the ARM stacks which are using this as well.

That blew up so nicely the last time we tried to change it and I know of at
least one case where radeon was/is used with BOs in a child process.

I'm way late and burried again, but I think it'd be good to be consistent

I had committed this change into our amd-staging-drm-next branch last
week after I got the ACK and RB from Felix and Christian.

here across drivers. Or at least across drm drivers. And we've had the vma
open/close refcounting to make fork work since forever.

I think if we do this we should really only do this for mmap() where this
applies, but reading through the thread here I'm honestly confused why
this is a problem. If CRIU can't handle forked mmaps it needs to be
thought that, not hacked around. Or at least I'm not understanding why
this shouldn't work ...
-Daniel

Hi Daniel

In the v2
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fall%2Fa1a865f5-ad2c-29c8-cbe4-2635d53eceb6%40amd.com%2FT%2F&data=04%7C01%7Crajneesh.bhardwaj%40amd.com%7Ce4634a16c37149da173408d9c58d1338%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637758031981907821%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=h0z4sO19bsJecMqeHGdz%2BHZElKuyzK%2BW%2FMbLWA79I10%3D&reserved=0
I pretty much limited the scope of the change to KFD BOs on mmap. Regarding
CRIU, I think its not a CRIU problem as CRIU on restore, only tries to
recreate all the child processes and then mmaps all the VMAs it sees (as per
checkpoint snapshot) in the new process address space after the VMA
placements are finalized in the position independent code phase. Since the
inherited VMAs don't have access rights the criu mmap fails.

Still sounds funky. I think minimally we should have an ack from CRIU
developers that this is officially the right way to solve this problem. I
really don't want to have random one-off hacks that don't work across the
board, for a problem where we (drm subsystem) really shouldn't be the only
one with this problem. Where "this problem" means that the mmap space is
per file description, and not per underlying inode or real device or
whatever. That part sounds like a CRIU problem, and I expect CRIU folks
want a consistent solution across the board for this. Hence please grab an
ack from them.

Cheers, Daniel

Maybe Adrian can share his views on this.

Hi Adrian - For the context, on CRIU restore we see mmap failures ( in
PIE restore phase) due to permission issues on the (render node) VMAs
that were inherited since the application that check pointed had
forked. The VMAs ideally should not be in the child process but the
smaps file shows these VMAs in the child address space. We didn't want
to use madvise to avoid this copy and rather change in the kernel mode
to limit the impact to our user space library thunk. Based on my
understanding, during PIE restore phase, after the VMA placements are
finalized, CRIU does a sys_mmap on all the VMA it sees in the VmaEntry
list and I think its not an issue as per CRIU design but do you think we
could handle this corner case better inside CRIU?

Regards,

Rajneesh

Regards,
Christian.

Regards,
Felix

Regards,
Christian.

Am 09.12.21 um 16:29 schrieb Bhardwaj, Rajneesh:

Sounds good. I will send a v2 with only ttm_bo_mmap_obj change. Thank
you!

On 12/9/2021 10:27 AM, Christian König wrote:

Hi Rajneesh,

yes, separating this from the drm_gem_mmap_obj() change is certainly
a good idea.

The child cannot access the BOs mapped by the parent anyway with
access restrictions applied

exactly that is not correct. That behavior is actively used by some
userspace stacks as far as I know.

Regards,
Christian.

Am 09.12.21 um 16:23 schrieb Bhardwaj, Rajneesh:

Thanks Christian. Would it make it less intrusive if I just use the
flag for ttm bo mmap and remove the drm_gem_mmap_obj change from
this patch? For our use case, just the ttm_bo_mmap_obj change
should suffice and we don't want to put any more work arounds in
the user space (thunk, in our case).

The child cannot access the BOs mapped by the parent anyway with
access restrictions applied so I wonder why even inherit the vma?

On 12/9/2021 2:54 AM, Christian König wrote:

Am 08.12.21 um

Re: [PATCH] drm/ttm: Don't inherit GEM object VMAs in child process

2021-12-22 Thread Bhardwaj, Rajneesh

Sorry for the typo in my previous email. Please read Adrian Reber*

On 12/22/2021 8:49 PM, Bhardwaj, Rajneesh wrote:

Adding Adrian Rebel who is the CRIU maintainer and CRIU list

On 12/22/2021 3:53 PM, Daniel Vetter wrote:

On Mon, Dec 20, 2021 at 01:12:51PM -0500, Bhardwaj, Rajneesh wrote:

On 12/20/2021 4:29 AM, Daniel Vetter wrote:

On Fri, Dec 10, 2021 at 07:58:50AM +0100, Christian König wrote:

Am 09.12.21 um 19:28 schrieb Felix Kuehling:

Am 2021-12-09 um 10:30 a.m. schrieb Christian König:

That still won't work.

But I think we could do this change for the amdgpu mmap callback only.

If graphics user mode has problems with it, we could even make this
specific to KFD BOs in the amdgpu_gem_object_mmap callback.

I think it's fine for the whole amdgpu stack, my concern is more about
radeon, nouveau and the ARM stacks which are using this as well.

That blew up so nicely the last time we tried to change it and I know of at
least one case where radeon was/is used with BOs in a child process.

I'm way late and burried again, but I think it'd be good to be consistent

I had committed this change into our amd-staging-drm-next branch last
week after I got the ACK and RB from Felix and Christian.

here across drivers. Or at least across drm drivers. And we've had the vma
open/close refcounting to make fork work since forever.

Hi Daniel

Cheers, Daniel

Maybe Adrian can share his views on this.

Hi Adrian - For the context, on CRIU restore we see mmap failures ( in
PIE restore phase) due to permission issues on the (render node) VMAs
that were inherited since the application that check pointed had
forked. The VMAs ideally should not be in the child process but the
smaps file shows these VMAs in the child address space. We didn't want
to use madvise to avoid this copy and rather change in the kernel mode
to limit the impact to our user space library thunk. Based on my
understanding, during PIE restore phase, after the VMA placements are
finalized, CRIU does a sys_mmap on all the VMA it sees in the VmaEntry
list and I think its not an issue as per CRIU design but do you think
we could handle this corner case better inside CRIU?

Regards,

Rajneesh

Regards,
Christian.

Regards,
Felix

Regards,
Christian.

Am 09.12.21 um 16:29 schrieb Bhardwaj, Rajneesh:

Sounds good. I will send a v2 with only ttm_bo_mmap_obj change. Thank
you!

On 12/9/2021 10:27 AM, Christian König wrote:

Hi Rajneesh,

yes, separating this from the drm_gem_mmap_obj() change is certainly
a good idea.

The child cannot access the BOs mapped by the parent anyway with
access restrictions applied

exactly that is not correct. That behavior is actively used by some
userspace stacks as far as I know.

Regards,
Christian.

Am 09.12.21 um 16:23 schrieb Bhardwaj, Rajneesh:

The child cannot access the BOs mapped by the parent anyway with
access rest

RE: [PATCH] amdgpu/pm: Modify sysfs to have only read permission in SRIOV/ONEVF mode

2021-12-22 Thread Quan, Evan

[AMD Official Use Only]

Reviewed-by: Evan Quan 

From: Nikolic, Marina 
Sent: Wednesday, December 22, 2021 7:25 PM
To: Quan, Evan ; Russell, Kent ; 
amd-gfx@lists.freedesktop.org
Cc: Mitrovic, Milan ; Kitchen, Greg 

Subject: Re: [PATCH] amdgpu/pm: Modify sysfs to have only read permission in 
SRIOV/ONEVF mode


[AMD Official Use Only]

>From a6512c0897aa58ccac9e5483d31193d83fb590b2 Mon Sep 17 00:00:00 2001
From: Marina Nikolic mailto:marina.niko...@amd.com>>
Date: Tue, 14 Dec 2021 20:57:53 +0800
Subject: [PATCH] amdgpu/pm: Modify sysfs to have only read permission in
 SRIOV/ONEVF mode

== Description ==
Setting through sysfs should not be allowed in SRIOV mode.
These calls will not be processed by FW anyway,
but error handling on sysfs level should be improved.

== Changes ==
This patch prohibits performing of all set commands
in SRIOV mode on sysfs level.
It offers better error handling as calls that are
not allowed will not be propagated further.

== Test ==
Writing to any sysfs file in passthrough mode will succeed.
Writing to any sysfs file in ONEVF mode will yield error:
"calling process does not have sufficient permission to execute a command".

Signed-off-by: Marina Nikolic 
mailto:marina.niko...@amd.com>>
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 082539c70fd4..c43818cd02aa 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2133,6 +2133,12 @@ static int default_attr_update(struct amdgpu_device 
*adev, struct amdgpu_device_
}
}

+   /* setting should not be allowed from VF */
+   if (amdgpu_sriov_vf(adev)) {
+   dev_attr->attr.mode &= ~S_IWUGO;
+   dev_attr->store = NULL;
+   }
+
 #undef DEVICE_ATTR_IS

return 0;
--
2.20.1



From: Quan, Evan mailto:evan.q...@amd.com>>
Sent: Wednesday, December 22, 2021 4:19 AM
To: Nikolic, Marina mailto:marina.niko...@amd.com>>; 
Russell, Kent mailto:kent.russ...@amd.com>>; 
amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>
Cc: Mitrovic, Milan mailto:milan.mitro...@amd.com>>; 
Kitchen, Greg mailto:greg.kitc...@amd.com>>
Subject: RE: [PATCH] amdgpu/pm: Modify sysfs pp_dpm_sclk to have only read 
premission in ONEVF mode


[AMD Official Use Only]







From: amd-gfx 
mailto:amd-gfx-boun...@lists.freedesktop.org>>
 On Behalf Of Nikolic, Marina
Sent: Tuesday, December 21, 2021 10:36 PM
To: Russell, Kent mailto:kent.russ...@amd.com>>; 
amd-gfx@lists.freedesktop.org
Cc: Mitrovic, Milan mailto:milan.mitro...@amd.com>>; 
Kitchen, Greg mailto:greg.kitc...@amd.com>>
Subject: Re: [PATCH] amdgpu/pm: Modify sysfs pp_dpm_sclk to have only read 
premission in ONEVF mode



[AMD Official Use Only]



[AMD Official Use Only]



>From 06359f3be0c0b889519d6dd954fb11f31e9a15e0 Mon Sep 17 00:00:00 2001

From: Marina Nikolic mailto:marina.niko...@amd.com>>

Date: Tue, 14 Dec 2021 20:57:53 +0800

Subject: [PATCH] amdgpu/pm: Modify sysfs pp_dpm_sclk to have only read

 permission in ONEVF mode

[Quan, Evan] With the subject updated(remove the description about 
pp_dpm_sclk), the patch is acked-by: Evan Quan 
mailto:evan.q...@amd.com>>



BR

Evan

== Description ==

Setting through sysfs should not be allowed in SRIOV mode.

These calls will not be processed by FW anyway,

but error handling on sysfs level should be improved.



== Changes ==

This patch prohibits performing of all set commands

in SRIOV mode on sysfs level.

It offers better error handling as calls that are

not allowed will not be propagated further.



== Test ==

Writing to any sysfs file in passthrough mode will succeed.

Writing to any sysfs file in ONEVF mode will yield error:

"calling process does not have sufficient permission to execute a command".



Signed-off-by: Marina Nikolic 
mailto:marina.niko...@amd.com>>

---

 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 6 ++

 1 file changed, 6 insertions(+)



diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c

index 082539c70fd4..c43818cd02aa 100644

--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c

+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c

@@ -2133,6 +2133,12 @@ static int default_attr_update(struct amdgpu_device 
*adev, struct amdgpu_device_

}

}



+   /* setting should not be allowed from VF */

+   if (amdgpu_sriov_vf(adev)) {

+   dev_attr->attr.mode &= ~S_IWUGO;

+   dev_attr->store = NULL;

+   }

+

 #undef DEVICE_ATTR_IS



return 0;

--

2.20.1





From: Nikolic, Marina mailto:marina.niko...@amd.com>>
Sent: Tuesday, December 21, 2021 3:15 PM
To: Russell, Kent mailto:kent.russ...@amd.com>>; 
amd-gfx@lists.freedesktop.org 
mailt

Re: Expecting to revert commit 55285e21f045 "fbdev/efifb: Release PCI device ..."

On Tue, Dec 21, 2021 at 1:47 PM Deucher, Alexander
 wrote:
>
> [Public]
>
> > -Original Message-
> > From: Deucher, Alexander
> > Sent: Tuesday, December 21, 2021 12:01 PM
> > To: Linus Torvalds ; Imre Deak
> > ; amd-gfx@lists.freedesktop.org
> > Cc: Daniel Vetter ; Kai-Heng Feng
> > 
> > Subject: RE: Expecting to revert commit 55285e21f045 "fbdev/efifb: Release
> > PCI device ..."
> >
> > [Public]
> >
> > > -Original Message-
> > > From: Linus Torvalds 
> > > Sent: Monday, December 20, 2021 5:05 PM
> > > To: Imre Deak 
> > > Cc: Daniel Vetter ; Deucher, Alexander
> > > ; Kai-Heng Feng
> > > 
> > > Subject: Re: Expecting to revert commit 55285e21f045 "fbdev/efifb:
> > > Release PCI device ..."
> > >
> > > On Mon, Dec 20, 2021 at 1:33 PM Imre Deak 
> > wrote:
> > > >
> > > > amdgpu.runpm=0
> > >
> > > Hmmm.
> > >
> > > This does seem to "work", but not very well.
> > >
> > > With this, what seems to happen is odd: I lock the screen, wait, it
> > > goes "No signal, shutting down", but then doesn't actually shut down
> > > but stays black (with the backlight on). After _another_ five seconds
> > > or so, the monitor goes "No signal, shutting down" _again_, and at that
> > point it actually does it.
> > >
> > > So it solves my immediate problem - in that yes, the backlight finally
> > > does turn off in the end - but it does seem to be still broken.
> > >
> > > I'm very surprised if no AMD drm developers can see this exact same thing.
> > > This is a very simple setup. The only possibly slightly less common
> > > thing is that I have two monitors, but while that is not necessarily
> > > the _most_ common setup in an absolute sense, I'd expect it to be very
> > > common among DRM developers..
> > >
> > > I guess I can just change the revert to just a
> > >
> > > -int amdgpu_runtime_pm = -1;
> > > +int amdgpu_runtime_pm = 0;
> > >
> > > instead. The auto-detect is apparently broken. Maybe it should only
> > > kick in for LVDS screens on actual laptops?
> > >
> > > Note: on my machine, I get that
> > >
> > >amdgpu :49:00.0: amdgpu: Using BACO for runtime pm
> > >
> > > so maybe the other possible runtime pm models (ARPX and BOCO) are ok,
> > > and it's only that BACO case that is broken.
> > >
> > > I have no idea what any of those three things are - I'm just looking
> > > at the uses of that amdgpu_runtime_pm variable.
> > >
> > > amdgpu people: if you don't want that amdgpu_runtime_pm turned off by
> > > default, tell me something else to try.
> >
> > For a little background, runtime PM support was added about 10 year ago
> > originally to support laptops with multiple GPUs (integrated and discrete).
> > It's not specific to the display hardware.  When the GPU is idle, it can be
> > powered down completely.  In the case of these laptops, it's D3 cold
> > (managed by ACPI, we call this BOCO in AMD parlance - Bus Off, Chip Off)
> > which powers off the dGPU completely (i.e., it disappears from the bus).  A
> > few years ago we extended this to support desktop dGPUs as well which
> > support their own version of runtime D3 (called BACO in AMD parlance - Bus
> > Active, Chip Off).  The driver can put the chip into a low power state where
> > everything except the bus interface is powered down (to avoid the device
> > disappearing from the bus).  So this has worked for almost 2 years now on
> > BACO capable parts and for a decade or more on BOCO systems.
> > Unfortunately, changing the default runpm parameter setting would cause a
> > flood of bug reports about runtime power management breaking and
> > suddenly systems are using more power.
> >
> > Imre's commit (55285e21f045) fixes another commit (a6c0fd3d5a8b).
> > Runtime pm was working on amdgpu prior to that commit.  Is it possible
> > there is still some race between when amdgpu takes over from efifb?  Does
> > it work properly when all pm_runtime calls in efifb are removed or if efifb 
> > is
> > not enabled?  Runtime pm for Polaris boards has been enabled by default
> > since 4fdda2e66de0b which predates both of those patches.
>
> Thinking about this more, I wonder if there was some change in some userspace 
> component which was hidden by the changes in 55285e21f045 and a6c0fd3d5a8b.  
> E.g., some desktop component started polling for display changes or GPU 
> temperature or something like that and when a6c0fd3d5a8b was in place the GPU 
> never entered runtime suspend.  Then when 55285e21f045 was applied, it 
> unmasked the new behavior in the userpace component.
>
> What should happen is that when all of the displays blank, assuming the GPU 
> is otherwise idle, the GPU will runtime suspend after  seconds.  When you 
> move the mouse or hit the keyboard, that should trigger the GPU should 
> runtime resume and then the displays will be re-enabled.
>
> In the behavior you are seeing, when the displays come back on after they 
> blank are you seeing the device resume from runtime suspend?  On resume from 
> suspend (runtime

Re: [PATCH] drm/ttm: Don't inherit GEM object VMAs in child process

2021-12-22 Thread Christian König


Am 22.12.21 um 21:53 schrieb Daniel Vetter:

On Mon, Dec 20, 2021 at 01:12:51PM -0500, Bhardwaj, Rajneesh wrote:

[SNIP]
Still sounds funky. I think minimally we should have an ack from CRIU
developers that this is officially the right way to solve this problem. I
really don't want to have random one-off hacks that don't work across the
board, for a problem where we (drm subsystem) really shouldn't be the only
one with this problem. Where "this problem" means that the mmap space is
per file description, and not per underlying inode or real device or
whatever. That part sounds like a CRIU problem, and I expect CRIU folks
want a consistent solution across the board for this. Hence please grab an
ack from them.


Unfortunately it's a KFD design problem. AMD used a single device node, 
then mmaped different objects from the same offset to different 
processes and expected it to work the rest of the fs subsystem without 
churn.


So yes, this is indeed because the mmap space is per file descriptor for 
the use case here.


And thanks for pointing this out, this indeed makes the whole change 
extremely questionable.


Regards,
Christian.



Cheers, Daniel

Re: [PATCH] drm/ttm: add workaround for some arm hardware issue

2021-12-22 Thread Christian König


Am 22.12.21 um 15:11 schrieb Alex Deucher:

On Wed, Dec 22, 2021 at 3:18 AM Deng, Emily  wrote:

[AMD Official Use Only]

Currently, only ampere found this issue, but it is hard to detect ampere board, 
especially on arm passthrough environment.

Isn't this already handled in drm_arch_can_wc_memory()?


Could be, that function controls if we are trying to setup USWC in the 
first place.


Christian



Alex


Best wishes
Emily Deng




-Original Message-
From: amd-gfx  On Behalf Of
Christian König
Sent: Wednesday, December 22, 2021 4:11 PM
To: Zhao, Victor ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/ttm: add workaround for some arm hardware issue

Am 22.12.21 um 06:51 schrieb Victor Zhao:

Some Arm based platform has hardware issue which may generate
incorrect addresses when receiving writes from the CPU with a
discontiguous set of byte enables. This affects the writes with write
combine property.

Can you point out which arm platforms are that exactly?


Workaround by change PROT_NORMAL_NC to PROT_DEVICE_nGnRE on arm.
As this is an issue with some specific arm based cpu, adding a ttm
parameter to control.

Something as fundamental as this should not be made controllable by an
module parameter.

Write combining is very important for good performance and so we should
only disable it on boards where we know that this won't work correctly.

Regards,
Christian.


Signed-off-by: Victor Zhao 
---
   drivers/gpu/drm/ttm/ttm_module.c | 8 +++-
   1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_module.c
b/drivers/gpu/drm/ttm/ttm_module.c
index e87f40674a4d..b27473cbbd52 100644
--- a/drivers/gpu/drm/ttm/ttm_module.c
+++ b/drivers/gpu/drm/ttm/ttm_module.c
@@ -41,6 +41,12 @@

   #include "ttm_module.h"

+static int enable_use_wc = 1;
+
+MODULE_PARM_DESC(enable_use_wc,
+"control write combine usage on arm platform due to hardware issue
+with write combine found on some specific arm cpu (1 =
+enable(default), 0 = disable)"); module_param(enable_use_wc, int,
+0644);
+
   /**
* ttm_prot_from_caching - Modify the page protection according to the
* ttm cacing mode
@@ -63,7 +69,7 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching

caching, pgprot_t tmp)

   #endif
   #if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \
  defined(__powerpc__) || defined(__mips__)
-if (caching == ttm_write_combined)
+if (caching == ttm_write_combined && enable_use_wc != 0)
  tmp = pgprot_writecombine(tmp);
  else
  tmp = pgprot_noncached(tmp);

Re: [PATCH] drm/ttm: add workaround for some arm hardware issue

2021-12-22 Thread Christian König


Am 22.12.21 um 06:51 schrieb Victor Zhao:

Some Arm based platform has hardware issue which may
generate incorrect addresses when receiving writes from the CPU
with a discontiguous set of byte enables. This affects the writes
with write combine property.


Can you point out which arm platforms are that exactly?


Workaround by change PROT_NORMAL_NC to PROT_DEVICE_nGnRE on arm.
As this is an issue with some specific arm based cpu, adding
a ttm parameter to control.


Something as fundamental as this should not be made controllable by an 
module parameter.


Write combining is very important for good performance and so we should 
only disable it on boards where we know that this won't work correctly.


Regards,
Christian.



Signed-off-by: Victor Zhao 
---
  drivers/gpu/drm/ttm/ttm_module.c | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c
index e87f40674a4d..b27473cbbd52 100644
--- a/drivers/gpu/drm/ttm/ttm_module.c
+++ b/drivers/gpu/drm/ttm/ttm_module.c
@@ -41,6 +41,12 @@
  
  #include "ttm_module.h"
  
+static int enable_use_wc = 1;

+
+MODULE_PARM_DESC(enable_use_wc,
+   "control write combine usage on arm platform due to hardware issue with 
write combine found on some specific arm cpu (1 = enable(default), 0 = disable)");
+module_param(enable_use_wc, int, 0644);
+
  /**
   * ttm_prot_from_caching - Modify the page protection according to the
   * ttm cacing mode
@@ -63,7 +69,7 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching caching, 
pgprot_t tmp)
  #endif
  #if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \
defined(__powerpc__) || defined(__mips__)
-   if (caching == ttm_write_combined)
+   if (caching == ttm_write_combined && enable_use_wc != 0)
tmp = pgprot_writecombine(tmp);
else
tmp = pgprot_noncached(tmp);

RE: [PATCH] drm/ttm: add workaround for some arm hardware issue

2021-12-22 Thread Deng, Emily

[AMD Official Use Only]

Currently, only ampere found this issue, but it is hard to detect ampere board, 
especially on arm passthrough environment.

Best wishes
Emily Deng



>-Original Message-
>From: amd-gfx  On Behalf Of
>Christian König
>Sent: Wednesday, December 22, 2021 4:11 PM
>To: Zhao, Victor ; amd-gfx@lists.freedesktop.org
>Subject: Re: [PATCH] drm/ttm: add workaround for some arm hardware issue
>
>Am 22.12.21 um 06:51 schrieb Victor Zhao:
>> Some Arm based platform has hardware issue which may generate
>> incorrect addresses when receiving writes from the CPU with a
>> discontiguous set of byte enables. This affects the writes with write
>> combine property.
>
>Can you point out which arm platforms are that exactly?
>
>> Workaround by change PROT_NORMAL_NC to PROT_DEVICE_nGnRE on arm.
>> As this is an issue with some specific arm based cpu, adding a ttm
>> parameter to control.
>
>Something as fundamental as this should not be made controllable by an
>module parameter.
>
>Write combining is very important for good performance and so we should
>only disable it on boards where we know that this won't work correctly.
>
>Regards,
>Christian.
>
>>
>> Signed-off-by: Victor Zhao 
>> ---
>>   drivers/gpu/drm/ttm/ttm_module.c | 8 +++-
>>   1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_module.c
>> b/drivers/gpu/drm/ttm/ttm_module.c
>> index e87f40674a4d..b27473cbbd52 100644
>> --- a/drivers/gpu/drm/ttm/ttm_module.c
>> +++ b/drivers/gpu/drm/ttm/ttm_module.c
>> @@ -41,6 +41,12 @@
>>
>>   #include "ttm_module.h"
>>
>> +static int enable_use_wc = 1;
>> +
>> +MODULE_PARM_DESC(enable_use_wc,
>> +"control write combine usage on arm platform due to hardware issue
>> +with write combine found on some specific arm cpu (1 =
>> +enable(default), 0 = disable)"); module_param(enable_use_wc, int,
>> +0644);
>> +
>>   /**
>>* ttm_prot_from_caching - Modify the page protection according to the
>>* ttm cacing mode
>> @@ -63,7 +69,7 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching
>caching, pgprot_t tmp)
>>   #endif
>>   #if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \
>>  defined(__powerpc__) || defined(__mips__)
>> -if (caching == ttm_write_combined)
>> +if (caching == ttm_write_combined && enable_use_wc != 0)
>>  tmp = pgprot_writecombine(tmp);
>>  else
>>  tmp = pgprot_noncached(tmp);

Re: [PATCH] amdgpu/pm: Modify sysfs to have only read permission in SRIOV/ONEVF mode

2021-12-22 Thread Nikolic, Marina

[AMD Official Use Only]

>From a6512c0897aa58ccac9e5483d31193d83fb590b2 Mon Sep 17 00:00:00 2001
From: Marina Nikolic 
Date: Tue, 14 Dec 2021 20:57:53 +0800
Subject: [PATCH] amdgpu/pm: Modify sysfs to have only read permission in
 SRIOV/ONEVF mode

== Description ==
Setting through sysfs should not be allowed in SRIOV mode.
These calls will not be processed by FW anyway,
but error handling on sysfs level should be improved.

== Changes ==
This patch prohibits performing of all set commands
in SRIOV mode on sysfs level.
It offers better error handling as calls that are
not allowed will not be propagated further.

== Test ==
Writing to any sysfs file in passthrough mode will succeed.
Writing to any sysfs file in ONEVF mode will yield error:
"calling process does not have sufficient permission to execute a command".

Signed-off-by: Marina Nikolic 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 082539c70fd4..c43818cd02aa 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2133,6 +2133,12 @@ static int default_attr_update(struct amdgpu_device 
*adev, struct amdgpu_device_
}
}

+   /* setting should not be allowed from VF */
+   if (amdgpu_sriov_vf(adev)) {
+   dev_attr->attr.mode &= ~S_IWUGO;
+   dev_attr->store = NULL;
+   }
+
 #undef DEVICE_ATTR_IS

return 0;
--
2.20.1



From: Quan, Evan 
Sent: Wednesday, December 22, 2021 4:19 AM
To: Nikolic, Marina ; Russell, Kent 
; amd-gfx@lists.freedesktop.org 

Cc: Mitrovic, Milan ; Kitchen, Greg 

Subject: RE: [PATCH] amdgpu/pm: Modify sysfs pp_dpm_sclk to have only read 
premission in ONEVF mode


[AMD Official Use Only]







From: amd-gfx  On Behalf Of Nikolic, 
Marina
Sent: Tuesday, December 21, 2021 10:36 PM
To: Russell, Kent ; amd-gfx@lists.freedesktop.org
Cc: Mitrovic, Milan ; Kitchen, Greg 

Subject: Re: [PATCH] amdgpu/pm: Modify sysfs pp_dpm_sclk to have only read 
premission in ONEVF mode



[AMD Official Use Only]



[AMD Official Use Only]



>From 06359f3be0c0b889519d6dd954fb11f31e9a15e0 Mon Sep 17 00:00:00 2001

From: Marina Nikolic mailto:marina.niko...@amd.com>>

Date: Tue, 14 Dec 2021 20:57:53 +0800

Subject: [PATCH] amdgpu/pm: Modify sysfs pp_dpm_sclk to have only read

 permission in ONEVF mode

[Quan, Evan] With the subject updated(remove the description about 
pp_dpm_sclk), the patch is acked-by: Evan Quan 



BR

Evan

== Description ==

Setting through sysfs should not be allowed in SRIOV mode.

These calls will not be processed by FW anyway,

but error handling on sysfs level should be improved.



== Changes ==

This patch prohibits performing of all set commands

in SRIOV mode on sysfs level.

It offers better error handling as calls that are

not allowed will not be propagated further.



== Test ==

Writing to any sysfs file in passthrough mode will succeed.

Writing to any sysfs file in ONEVF mode will yield error:

"calling process does not have sufficient permission to execute a command".



Signed-off-by: Marina Nikolic 
mailto:marina.niko...@amd.com>>

---

 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 6 ++

 1 file changed, 6 insertions(+)



diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c

index 082539c70fd4..c43818cd02aa 100644

--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c

+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c

@@ -2133,6 +2133,12 @@ static int default_attr_update(struct amdgpu_device 
*adev, struct amdgpu_device_

}

}



+   /* setting should not be allowed from VF */

+   if (amdgpu_sriov_vf(adev)) {

+   dev_attr->attr.mode &= ~S_IWUGO;

+   dev_attr->store = NULL;

+   }

+

 #undef DEVICE_ATTR_IS



return 0;

--

2.20.1





From: Nikolic, Marina mailto:marina.niko...@amd.com>>
Sent: Tuesday, December 21, 2021 3:15 PM
To: Russell, Kent mailto:kent.russ...@amd.com>>; 
amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>
Cc: Mitrovic, Milan mailto:milan.mitro...@amd.com>>; 
Kitchen, Greg mailto:greg.kitc...@amd.com>>
Subject: Re: [PATCH] amdgpu/pm: Modify sysfs pp_dpm_sclk to have only read 
premission in ONEVF mode



Hi Kent,



Thank you for the review. Yes, I can confirm I am trying to set this for every 
single file for SRIOV mode.

@Kitchen, Greg required this for ROCM-SMI 5.0 
release. In case you need it, he can provide more details.

I'm going to clarify commit message more and send a new patch.



BR,
Marina



From: Russell, Kent mailto:kent.russ...@amd.com>>
Sent: Monday, December 20, 2021 8:01 PM
To: Nikolic, Marina mailto:marina.niko...@amd.com>>; 
amd-gfx@lists.freedesktop.org

Re: [PATCH V5 13/16] drm/amd/pm: relocate the power related headers

2021-12-22 Thread Lazar, Lijo





On 12/22/2021 10:53 AM, Quan, Evan wrote:

[AMD Official Use Only]




-Original Message-
From: Lazar, Lijo 
Sent: Tuesday, December 21, 2021 2:22 PM
To: Quan, Evan ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: Re: [PATCH V5 13/16] drm/amd/pm: relocate the power related
headers



On 12/13/2021 9:22 AM, Evan Quan wrote:

Instead of centralizing all headers in the same folder. Separate them
into different folders and place them among those source files those
who really need them.

Signed-off-by: Evan Quan 
Change-Id: Id74cb4c7006327ca7ecd22daf17321e417c4aa71
--
v1->v2:
- create separate holders for driver_if and ppsmc headers(Lijo)
---
   drivers/gpu/drm/amd/pm/Makefile   | 12 ---
   drivers/gpu/drm/amd/pm/legacy-dpm/Makefile| 32

+++

   .../pm/{powerplay => legacy-dpm}/cik_dpm.h|  0
   .../amd/pm/{powerplay => legacy-dpm}/kv_dpm.c |  0
   .../amd/pm/{powerplay => legacy-dpm}/kv_dpm.h |  0
   .../amd/pm/{powerplay => legacy-dpm}/kv_smc.c |  0
   .../pm/{powerplay => legacy-dpm}/legacy_dpm.c |  0
   .../pm/{powerplay => legacy-dpm}/legacy_dpm.h |  0
   .../amd/pm/{powerplay => legacy-dpm}/ppsmc.h  |  0
   .../pm/{powerplay => legacy-dpm}/r600_dpm.h   |  0
   .../amd/pm/{powerplay => legacy-dpm}/si_dpm.c |  0
   .../amd/pm/{powerplay => legacy-dpm}/si_dpm.h |  0
   .../amd/pm/{powerplay => legacy-dpm}/si_smc.c |  0
   .../{powerplay => legacy-dpm}/sislands_smc.h  |  0
   drivers/gpu/drm/amd/pm/powerplay/Makefile |  6 +---
   .../pm/{ => powerplay}/inc/amd_powerplay.h|  0
   .../drm/amd/pm/{ => powerplay}/inc/cz_ppsmc.h |  0
   .../amd/pm/{ => powerplay}/inc/fiji_ppsmc.h   |  0
   .../pm/{ => powerplay}/inc/hardwaremanager.h  |  0
   .../drm/amd/pm/{ => powerplay}/inc/hwmgr.h|  0
   .../{ => powerplay}/inc/polaris10_pwrvirus.h  |  0
   .../amd/pm/{ => powerplay}/inc/power_state.h  |  0
   .../drm/amd/pm/{ => powerplay}/inc/pp_debug.h |  0
   .../amd/pm/{ => powerplay}/inc/pp_endian.h|  0
   .../amd/pm/{ => powerplay}/inc/pp_thermal.h   |  0
   .../amd/pm/{ => powerplay}/inc/ppinterrupt.h  |  0
   .../drm/amd/pm/{ => powerplay}/inc/rv_ppsmc.h |  0
   .../drm/amd/pm/{ => powerplay}/inc/smu10.h|  0
   .../pm/{ => powerplay}/inc/smu10_driver_if.h  |  0
   .../pm/{ => powerplay}/inc/smu11_driver_if.h  |  0
   .../gpu/drm/amd/pm/{ => powerplay}/inc/smu7.h |  0
   .../drm/amd/pm/{ => powerplay}/inc/smu71.h|  0
   .../pm/{ => powerplay}/inc/smu71_discrete.h   |  0
   .../drm/amd/pm/{ => powerplay}/inc/smu72.h|  0
   .../pm/{ => powerplay}/inc/smu72_discrete.h   |  0
   .../drm/amd/pm/{ => powerplay}/inc/smu73.h|  0
   .../pm/{ => powerplay}/inc/smu73_discrete.h   |  0
   .../drm/amd/pm/{ => powerplay}/inc/smu74.h|  0
   .../pm/{ => powerplay}/inc/smu74_discrete.h   |  0
   .../drm/amd/pm/{ => powerplay}/inc/smu75.h|  0
   .../pm/{ => powerplay}/inc/smu75_discrete.h   |  0
   .../amd/pm/{ => powerplay}/inc/smu7_common.h  |  0
   .../pm/{ => powerplay}/inc/smu7_discrete.h|  0
   .../amd/pm/{ => powerplay}/inc/smu7_fusion.h  |  0
   .../amd/pm/{ => powerplay}/inc/smu7_ppsmc.h   |  0
   .../gpu/drm/amd/pm/{ => powerplay}/inc/smu8.h |  0
   .../amd/pm/{ => powerplay}/inc/smu8_fusion.h  |  0
   .../gpu/drm/amd/pm/{ => powerplay}/inc/smu9.h |  0
   .../pm/{ => powerplay}/inc/smu9_driver_if.h   |  0
   .../{ => powerplay}/inc/smu_ucode_xfer_cz.h   |  0
   .../{ => powerplay}/inc/smu_ucode_xfer_vi.h   |  0
   .../drm/amd/pm/{ => powerplay}/inc/smumgr.h   |  0
   .../amd/pm/{ => powerplay}/inc/tonga_ppsmc.h  |  0
   .../amd/pm/{ => powerplay}/inc/vega10_ppsmc.h |  0
   .../inc/vega12/smu9_driver_if.h   |  0
   .../amd/pm/{ => powerplay}/inc/vega12_ppsmc.h |  0
   .../amd/pm/{ => powerplay}/inc/vega20_ppsmc.h |  0
   .../drm/amd/pm/{ => swsmu}/inc/amdgpu_smu.h   |  0
   .../inc/interface}/smu11_driver_if_arcturus.h |  0
   .../smu11_driver_if_cyan_skillfish.h  |  0
   .../inc/interface}/smu11_driver_if_navi10.h   |  0
   .../smu11_driver_if_sienna_cichlid.h  |  0
   .../inc/interface}/smu11_driver_if_vangogh.h  |  0
   .../inc/interface}/smu12_driver_if.h  |  0
   .../interface}/smu13_driver_if_aldebaran.h|  0
   .../interface}/smu13_driver_if_yellow_carp.h  |  0
   .../inc/interface}/smu_v11_5_pmfw.h   |  0
   .../inc/interface}/smu_v11_8_pmfw.h   |  0
   .../inc/interface}/smu_v13_0_1_pmfw.h |  0
   .../inc/message}/aldebaran_ppsmc.h|  0
   .../inc/message}/arcturus_ppsmc.h |  0
   .../inc/message}/smu_v11_0_7_ppsmc.h  |  0
   .../inc/message}/smu_v11_0_ppsmc.h|  0
   .../inc/message}/smu_v11_5_ppsmc.h|  0
   .../inc/message}/smu_v11_8_ppsmc.h|  0
   .../inc/message}/smu_v12_0_ppsmc.h|  0
   .../inc/message}/smu_v13_0_1_ppsmc.h  |  0
   .../pm/{ => swsmu}/inc/smu_11_0_cdr_table.h   |  0
   .../drm/amd/pm/{ => swsmu}/inc/smu_types.h|  0
   .../drm/amd/pm/{

Re: [PATCH] amdgpu/pm: Modify sysfs to have only read permission in SRIOV/ONEVF mode

2021-12-22 Thread Lazar, Lijo





On 12/22/2021 4:55 PM, Nikolic, Marina wrote:

[AMD Official Use Only]


[AMD Official Use Only]


 From a6512c0897aa58ccac9e5483d31193d83fb590b2 Mon Sep 17 00:00:00 2001
From: Marina Nikolic 
Date: Tue, 14 Dec 2021 20:57:53 +0800
Subject: [PATCH] amdgpu/pm: Modify sysfs to have only read permission in
  SRIOV/ONEVF mode


Maybe change subject as "Make sysfs pm attributes as read-only for VFs"

and description like as "Setting values of pm attributes through sysfs..."

Only cosmetic changes, no need to post another one for review again.

Reviewed-by: Lijo Lazar 

Thanks,
Lijo


== Description ==
Setting through sysfs should not be allowed in SRIOV mode.
These calls will not be processed by FW anyway,
but error handling on sysfs level should be improved.

== Changes ==
This patch prohibits performing of all set commands
in SRIOV mode on sysfs level.
It offers better error handling as calls that are
not allowed will not be propagated further.

== Test ==
Writing to any sysfs file in passthrough mode will succeed.
Writing to any sysfs file in ONEVF mode will yield error:
"calling process does not have sufficient permission to execute a command".

Signed-off-by: Marina Nikolic 
---
  drivers/gpu/drm/amd/pm/amdgpu_pm.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c

index 082539c70fd4..c43818cd02aa 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2133,6 +2133,12 @@ static int default_attr_update(struct 
amdgpu_device *adev, struct amdgpu_device_

                 }
         }

+       /* setting should not be allowed from VF */
+       if (amdgpu_sriov_vf(adev)) {
+               dev_attr->attr.mode &= ~S_IWUGO;
+               dev_attr->store = NULL;
+       }
+
  #undef DEVICE_ATTR_IS

         return 0;
--
2.20.1



*From:* Quan, Evan 
*Sent:* Wednesday, December 22, 2021 4:19 AM
*To:* Nikolic, Marina ; Russell, Kent 
; amd-gfx@lists.freedesktop.org 

*Cc:* Mitrovic, Milan ; Kitchen, Greg 

*Subject:* RE: [PATCH] amdgpu/pm: Modify sysfs pp_dpm_sclk to have only 
read premission in ONEVF mode


[AMD Official Use Only]

*From:* amd-gfx  *On Behalf Of * 
Nikolic, Marina

*Sent:* Tuesday, December 21, 2021 10:36 PM
*To:* Russell, Kent ; amd-gfx@lists.freedesktop.org
*Cc:* Mitrovic, Milan ; Kitchen, Greg 

*Subject:* Re: [PATCH] amdgpu/pm: Modify sysfs pp_dpm_sclk to have only 
read premission in ONEVF mode


[AMD Official Use Only]

[AMD Official Use Only]

 From 06359f3be0c0b889519d6dd954fb11f31e9a15e0 Mon Sep 17 00:00:00 2001

From: Marina Nikolic >


Date: Tue, 14 Dec 2021 20:57:53 +0800

Subject: [PATCH] amdgpu/pm: Modify sysfs pp_dpm_sclk to have only read

  permission in ONEVF mode

*/[Quan, Evan] With the subject updated(remove the description about 
pp_dpm_sclk), the patch is acked-by: Evan Quan /*


BR

Evan*//*

== Description ==

Setting through sysfs should not be allowed in SRIOV mode.

These calls will not be processed by FW anyway,

but error handling on sysfs level should be improved.

== Changes ==

This patch prohibits performing of all set commands

in SRIOV mode on sysfs level.

It offers better error handling as calls that are

not allowed will not be propagated further.

== Test ==

Writing to any sysfs file in passthrough mode will succeed.

Writing to any sysfs file in ONEVF mode will yield error:

"calling process does not have sufficient permission to execute a command".

Signed-off-by: Marina Nikolic >


---

  drivers/gpu/drm/amd/pm/amdgpu_pm.c | 6 ++

  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c


index 082539c70fd4..c43818cd02aa 100644

--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c

+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c

@@ -2133,6 +2133,12 @@ static int default_attr_update(struct 
amdgpu_device *adev, struct amdgpu_device_


                 }

         }

+       /* setting should not be allowed from VF */

+       if (amdgpu_sriov_vf(adev)) {

+               dev_attr->attr.mode &= ~S_IWUGO;

+               dev_attr->store = NULL;

+       }

+

  #undef DEVICE_ATTR_IS

         return 0;

--

2.20.1



*From:*Nikolic, Marina >

*Sent:* Tuesday, December 21, 2021 3:15 PM
*To:* Russell, Kent >; amd-gfx@lists.freedesktop.org 
 >
*Cc:* Mitrovic, Milan >; Kitchen, Greg >
*Subject:* Re: [PATCH] amdgpu/pm: Modify sysfs pp_dpm_sclk to have only 
read premission in ONEVF mode


Hi Kent,

Thank you for the review. Yes, I can confirm I am trying to set this for 
every single fil

Re: Various problems trying to vga-passthrough a Renoir iGPU to a xen/qubes-os hvm

On Tue, Dec 21, 2021 at 6:09 PM Yann Dirson  wrote:
>
>
>
> - Mail original -
> > De: "Alex Deucher" 
> > À: "Yann Dirson" 
> > Cc: "Christian König" , "amd-gfx list" 
> > 
> > Envoyé: Mardi 21 Décembre 2021 23:31:01
> > Objet: Re: Various problems trying to vga-passthrough a Renoir iGPU to a 
> > xen/qubes-os hvm
> >
> > On Tue, Dec 21, 2021 at 5:12 PM Yann Dirson  wrote:
> > >
> > >
> > > Alex wrote:
> > > >
> > > > On Sun, Dec 19, 2021 at 11:41 AM Yann Dirson 
> > > > wrote:
> > > > >
> > > > > Christian wrote:
> > > > > > Am 19.12.21 um 17:00 schrieb Yann Dirson:
> > > > > > > Alex wrote:
> > > > > > >> Thinking about this more, I think the problem might be
> > > > > > >> related
> > > > > > >> to
> > > > > > >> CPU
> > > > > > >> access to "VRAM".  APUs don't have dedicated VRAM, they
> > > > > > >> use a
> > > > > > >> reserved
> > > > > > >> carve out region at the top of system memory.  For CPU
> > > > > > >> access
> > > > > > >> to
> > > > > > >> this
> > > > > > >> memory, we kmap the physical address of the carve out
> > > > > > >> region
> > > > > > >> of
> > > > > > >> system
> > > > > > >> memory.  You'll need to make sure that region is
> > > > > > >> accessible to
> > > > > > >> the
> > > > > > >> guest.
> > > > > > > So basically, the non-virt flow is is: (video?) BIOS
> > > > > > > reserves
> > > > > > > memory, marks it
> > > > > > > as reserved in e820, stores the physaddr somewhere, which
> > > > > > > the
> > > > > > > GPU
> > > > > > > driver gets.
> > > > > > > Since I suppose this includes the framebuffer, this
> > > > > > > probably
> > > > > > > has to
> > > > > > > occur around
> > > > > > > the moment the driver calls
> > > > > > > drm_aperture_remove_conflicting_pci_framebuffers()
> > > > > > > (which happens before this hw init step), right ?
> > > > > >
> > > > > > Well, that partially correct. The efifb is using the PCIe
> > > > > > resources
> > > > > > to
> > > > > > access the framebuffer and as far as I know we use that one
> > > > > > to
> > > > > > kick
> > > > > > it out.
> > > > > >
> > > > > > The stolen memory we get over e820/registers is separate to
> > > > > > that.
> > >
> > > How is the stolen memory communicated to the driver ?  That host
> > > physical
> > > memory probably has to be mapped at the same guest physical address
> > > for
> > > the magic to work, right ?
> >
> > Correct.  The driver reads the physical location of that memory from
> > hardware registers.  Removing this chunk of code from gmc_v9_0.c will
> > force the driver to use the BAR, but I'm not sure if there are any
> > other places in the driver that make assumptions about using the
> > physical host address or not on APUs off hand.
> >
> > if ((adev->flags & AMD_IS_APU) ||
> > (adev->gmc.xgmi.supported &&
> >  adev->gmc.xgmi.connected_to_cpu)) {
> > adev->gmc.aper_base =
> > adev->gfxhub.funcs->get_mc_fb_offset(adev) +
> > adev->gmc.xgmi.physical_node_id *
> > adev->gmc.xgmi.node_segment_size;
> > adev->gmc.aper_size = adev->gmc.real_vram_size;
> > }
> >
> >
> >
> > >
> > > > > >
> > > > > > > ... which brings me to a point that's been puzzling me for
> > > > > > > some
> > > > > > > time, which is
> > > > > > > that as the hw init fails, the efifb driver is still using
> > > > > > > the
> > > > > > > framebuffer.
> > > > > >
> > > > > > No, it isn't. You are probably just still seeing the same
> > > > > > screen.
> > > > > >
> > > > > > The issue is most likely that while efi was kicked out nobody
> > > > > > re-programmed the display hardware to show something
> > > > > > different.
> > > > > >
> > > > > > > Am I right in suspecting that efifb should get stripped of
> > > > > > > its
> > > > > > > ownership of the
> > > > > > > fb aperture first, and that if I don't get a black screen
> > > > > > > on
> > > > > > > hw_init failure
> > > > > > > that issue should be the first focus point ?
> > > > > >
> > > > > > You assumption with the black screen is incorrect. Since the
> > > > > > hardware
> > > > > > works independent even if you kick out efi you still have the
> > > > > > same
> > > > > > screen content, you just can't update it anymore.
> > > > >
> > > > > It's not only that the screen keeps its contents, it's that the
> > > > > dom0
> > > > > happily continues updating it.
> > > >
> > > > If the hypevisor is using efifb, then yes that could be a problem
> > > > as
> > > > the hypervisor could be writing to the efifb resources which ends
> > > > up
> > > > writing to the same physical memory.  That applies to any GPU on
> > > > a
> > > > UEFI system.  You'll need to make sure efifb is not in use in the
> > > > hypervisor.
> > >
> > > That remark evokes several things to me.  First one is that every
> > > time
> > > I've tried booting with efifb disabled in dom0, there was no
> > > visible
> > > improvements in the guest driver -

Re: [PATCH] drm/ttm: add workaround for some arm hardware issue

On Wed, Dec 22, 2021 at 3:18 AM Deng, Emily  wrote:
>
> [AMD Official Use Only]
>
> Currently, only ampere found this issue, but it is hard to detect ampere 
> board, especially on arm passthrough environment.

Isn't this already handled in drm_arch_can_wc_memory()?

Alex

>
> Best wishes
> Emily Deng
>
>
>
> >-Original Message-
> >From: amd-gfx  On Behalf Of
> >Christian König
> >Sent: Wednesday, December 22, 2021 4:11 PM
> >To: Zhao, Victor ; amd-gfx@lists.freedesktop.org
> >Subject: Re: [PATCH] drm/ttm: add workaround for some arm hardware issue
> >
> >Am 22.12.21 um 06:51 schrieb Victor Zhao:
> >> Some Arm based platform has hardware issue which may generate
> >> incorrect addresses when receiving writes from the CPU with a
> >> discontiguous set of byte enables. This affects the writes with write
> >> combine property.
> >
> >Can you point out which arm platforms are that exactly?
> >
> >> Workaround by change PROT_NORMAL_NC to PROT_DEVICE_nGnRE on arm.
> >> As this is an issue with some specific arm based cpu, adding a ttm
> >> parameter to control.
> >
> >Something as fundamental as this should not be made controllable by an
> >module parameter.
> >
> >Write combining is very important for good performance and so we should
> >only disable it on boards where we know that this won't work correctly.
> >
> >Regards,
> >Christian.
> >
> >>
> >> Signed-off-by: Victor Zhao 
> >> ---
> >>   drivers/gpu/drm/ttm/ttm_module.c | 8 +++-
> >>   1 file changed, 7 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/gpu/drm/ttm/ttm_module.c
> >> b/drivers/gpu/drm/ttm/ttm_module.c
> >> index e87f40674a4d..b27473cbbd52 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_module.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_module.c
> >> @@ -41,6 +41,12 @@
> >>
> >>   #include "ttm_module.h"
> >>
> >> +static int enable_use_wc = 1;
> >> +
> >> +MODULE_PARM_DESC(enable_use_wc,
> >> +"control write combine usage on arm platform due to hardware issue
> >> +with write combine found on some specific arm cpu (1 =
> >> +enable(default), 0 = disable)"); module_param(enable_use_wc, int,
> >> +0644);
> >> +
> >>   /**
> >>* ttm_prot_from_caching - Modify the page protection according to the
> >>* ttm cacing mode
> >> @@ -63,7 +69,7 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching
> >caching, pgprot_t tmp)
> >>   #endif
> >>   #if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \
> >>  defined(__powerpc__) || defined(__mips__)
> >> -if (caching == ttm_write_combined)
> >> +if (caching == ttm_write_combined && enable_use_wc != 0)
> >>  tmp = pgprot_writecombine(tmp);
> >>  else
> >>  tmp = pgprot_noncached(tmp);
>

[PATCH V2] drm/i915: Replace kmap() with kmap_local_page()

2021-12-22 Thread ira . weiny

From: Ira Weiny 

kmap() is being deprecated and these usages are all local to the thread
so there is no reason kmap_local_page() can't be used.

Replace kmap() calls with kmap_local_page().

Signed-off-by: Ira Weiny 

---
NOTE: I'm sending as a follow on to the V1 patch.  Please let me know if you
prefer the entire series instead.

Changes for V2:
From Christoph Helwig
Prefer the use of memcpy_*_page() where appropriate.
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 6 ++
 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 8 
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c   | 4 ++--
 drivers/gpu/drm/i915/gt/shmem_utils.c  | 7 ++-
 drivers/gpu/drm/i915/i915_gem.c| 8 
 drivers/gpu/drm/i915/i915_gpu_error.c  | 4 ++--
 6 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index d77da59fae04..842e0895 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -589,7 +589,7 @@ i915_gem_object_create_shmem_from_data(struct 
drm_i915_private *dev_priv,
do {
unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
struct page *page;
-   void *pgdata, *vaddr;
+   void *pgdata;
 
err = pagecache_write_begin(file, file->f_mapping,
offset, len, 0,
@@ -597,9 +597,7 @@ i915_gem_object_create_shmem_from_data(struct 
drm_i915_private *dev_priv,
if (err < 0)
goto fail;
 
-   vaddr = kmap(page);
-   memcpy(vaddr, data, len);
-   kunmap(page);
+   memcpy_to_page(page, 0, data, len);
 
err = pagecache_write_end(file, file->f_mapping,
  offset, len, len,
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 6d30cdfa80f3..e59e1725e29d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -144,7 +144,7 @@ static int check_partial_mapping(struct drm_i915_gem_object 
*obj,
intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
 
p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
-   cpu = kmap(p) + offset_in_page(offset);
+   cpu = kmap_local_page(p) + offset_in_page(offset);
drm_clflush_virt_range(cpu, sizeof(*cpu));
if (*cpu != (u32)page) {
pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, 
row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to 
page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
@@ -162,7 +162,7 @@ static int check_partial_mapping(struct drm_i915_gem_object 
*obj,
}
*cpu = 0;
drm_clflush_virt_range(cpu, sizeof(*cpu));
-   kunmap(p);
+   kunmap_local(cpu);
 
 out:
__i915_vma_put(vma);
@@ -237,7 +237,7 @@ static int check_partial_mappings(struct 
drm_i915_gem_object *obj,
intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
 
p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
-   cpu = kmap(p) + offset_in_page(offset);
+   cpu = kmap_local_page(p) + offset_in_page(offset);
drm_clflush_virt_range(cpu, sizeof(*cpu));
if (*cpu != (u32)page) {
pr_err("Partial view for %lu [%u] (offset=%llu, size=%u 
[%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected 
write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
@@ -255,7 +255,7 @@ static int check_partial_mappings(struct 
drm_i915_gem_object *obj,
}
*cpu = 0;
drm_clflush_virt_range(cpu, sizeof(*cpu));
-   kunmap(p);
+   kunmap_local(cpu);
if (err)
return err;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index f8948de72036..743a414f86f3 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -743,7 +743,7 @@ static void swizzle_page(struct page *page)
char *vaddr;
int i;
 
-   vaddr = kmap(page);
+   vaddr = kmap_local_page(page);
 
for (i = 0; i < PAGE_SIZE; i += 128) {
memcpy(temp, &vaddr[i], 64);
@@ -751,7 +751,7 @@ static void swizzle_page(struct page *page)
memcpy(&vaddr[i + 64], temp, 64);
}
 
-   kunmap(page);
+   kunmap_local(vaddr);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c 
b/drivers/gpu/drm/i915/gt/shmem_utils.c
index 0683b27a3890..d47f262d2f07 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/

[PATCH] drm/amdgpu: avoid NULL pointer dereference

2021-12-22 Thread Guchun Chen

amdgpu_umc_poison_handler for UMC RAS consumption gets
called in KFD queue reset, but it needs to return early when
RAS context is NULL. This can guarantee lower access to
RAS context like in amdgpu_umc_do_page_retirement. Also
improve coding style in amdgpu_umc_poison_handler.

Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 46264a4002f7..b455fc7d1546 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -112,16 +112,20 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
void *ras_error_status,
bool reset)
 {
-   int ret;
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
struct ras_common_if head = {
.block = AMDGPU_RAS_BLOCK__UMC,
};
-   struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
+   struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+   struct ras_manager *obj;
+   int ret;
+
+   if (!con)
+   return 0;
 
-   ret =
-   amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, 
reset);
+   ret = amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, 
reset);
 
+   obj = amdgpu_ras_find_obj(adev, &head);
if (ret == AMDGPU_RAS_SUCCESS && obj) {
obj->err_data.ue_count += err_data->ue_count;
obj->err_data.ce_count += err_data->ce_count;
-- 
2.17.1

Re: [PATCH] drm/amdgpu: fix runpm documentation

On Tue, Dec 21, 2021 at 10:13 PM Quan, Evan  wrote:
>
> [AMD Official Use Only]
>
>
>
> > -Original Message-
> > From: amd-gfx  On Behalf Of Alex
> > Deucher
> > Sent: Tuesday, December 21, 2021 10:59 PM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Deucher, Alexander 
> > Subject: [PATCH] drm/amdgpu: fix runpm documentation
> >
> > It's not only supported by HG/PX laptops.  It's supported
> > by all dGPUs which supports BOCO/BACO functionality (runtime
> > D3).
> >
> > BOCO - Bus Off, Chip Off.  The entire chip is powered off.
> >This is controlled by ACPI.
> > BACO - Bus Active, Chip Off.  The chip still shows up
> >on the PCI bus, but the device itself is powered
> >down.
> >
> > Signed-off-by: Alex Deucher 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +++--
> >  1 file changed, 3 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index a78bbea9629d..f001924ed92e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -331,9 +331,10 @@ module_param_named(aspm, amdgpu_aspm, int,
> > 0444);
> >  /**
> >   * DOC: runpm (int)
> >   * Override for runtime power management control for dGPUs in PX/HG
> > laptops. The amdgpu driver can dynamically power down
> [Quan, Evan] This("dGPUs in PX/HG latops") needs also be updated. Maybe 
> missing unintentionally ?
> With that fixed, the patch is reviewed-by: Evan Quan 

Yes, good catch.  Fixed up.

Thanks!

Alex

>
> BR
> Evan
> > - * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto 
> > enable).
> > Setting the value to 0 disables this functionality.
> > + * the dGPUs when they are idle if supported. The default is -1 (auto
> > enable).
> > + * Setting the value to 0 disables this functionality.
> >   */
> > -MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with
> > BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)");
> > +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with
> > BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto)");
> >  module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
> >
> >  /**
> > --
> > 2.33.1

Re: [PATCH] drm/amd/display: Fix the uninitialized variable in enable_stream_features()

Applied.  Thanks!

Alex

On Fri, Dec 17, 2021 at 11:22 PM Yizhuo Zhai  wrote:
>
> In function enable_stream_features(), the variable "old_downspread.raw"
> could be uninitialized if core_link_read_dpcd() fails, however, it is
> used in the later if statement, and further, core_link_write_dpcd()
> may write random value, which is potentially unsafe.
>
> Fixes: 6016cd9dba0f ("drm/amd/display: add helper for enabling mst stream 
> features")
> Cc: sta...@vger.kernel.org
> Signed-off-by: Yizhuo Zhai 
> ---
>  drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
> b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
> index c8457babfdea..fd5a0e7eb029 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
> @@ -1844,6 +1844,8 @@ static void enable_stream_features(struct pipe_ctx 
> *pipe_ctx)
> union down_spread_ctrl old_downspread;
> union down_spread_ctrl new_downspread;
>
> +   memset(&old_downspread, 0, sizeof(old_downspread));
> +
> core_link_read_dpcd(link, DP_DOWNSPREAD_CTRL,
> &old_downspread.raw, sizeof(old_downspread));
>
> --
> 2.25.1
>

[PATCH] drm/amdgpu: disable hotplug events on runtime pm resume

When runtime pm kicks in and the device goes into runtime
suspend, we often see random calls (small rendering calls,
etc.) into the driver which cause the device to runtime
resume.  On resume we issue a hotplug event in case any
displays were changed during suspend, however, these events
cause the compositor to probe the displays and power then
back up leading to the user seeing the displays go off
followed by them coming back on again.

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1840
Bug: https://bugzilla.kernel.org/show_bug.cgi?id=215203
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 36 +-
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 00b29ff414de..0970105ed03c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4070,25 +4070,33 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
 
amdgpu_ras_resume(adev);
 
-   /*
-* Most of the connector probing functions try to acquire runtime pm
-* refs to ensure that the GPU is powered on when connector polling is
-* performed. Since we're calling this from a runtime PM callback,
-* trying to acquire rpm refs will cause us to deadlock.
-*
-* Since we're guaranteed to be holding the rpm lock, it's safe to
-* temporarily disable the rpm helpers so this doesn't deadlock us.
+   /* Skip hotplug events in runtime pm.  Otherwise, we end up having the 
GPU
+* woken up periodically for random things which causes a hotplug event
+* that the desktop compostior to reacts to and re-enables the
+* displays.  This leads to displays turning back on soon after they go 
to
+* sleep in some cases if runtime pm kicks in.
 */
+   if (!adev->in_runpm) {
+   /*
+* Most of the connector probing functions try to acquire 
runtime pm
+* refs to ensure that the GPU is powered on when connector 
polling is
+* performed. Since we're calling this from a runtime PM 
callback,
+* trying to acquire rpm refs will cause us to deadlock.
+*
+* Since we're guaranteed to be holding the rpm lock, it's safe 
to
+* temporarily disable the rpm helpers so this doesn't deadlock 
us.
+*/
 #ifdef CONFIG_PM
-   dev->dev->power.disable_depth++;
+   dev->dev->power.disable_depth++;
 #endif
-   if (!amdgpu_device_has_dc_support(adev))
-   drm_helper_hpd_irq_event(dev);
-   else
-   drm_kms_helper_hotplug_event(dev);
+   if (!amdgpu_device_has_dc_support(adev))
+   drm_helper_hpd_irq_event(dev);
+   else
+   drm_kms_helper_hotplug_event(dev);
 #ifdef CONFIG_PM
-   dev->dev->power.disable_depth--;
+   dev->dev->power.disable_depth--;
 #endif
+   }
adev->in_suspend = false;
 
if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
-- 
2.33.1

Re: [PATCH] drm/ttm: Don't inherit GEM object VMAs in child process

2021-12-22 Thread Daniel Vetter

On Mon, Dec 20, 2021 at 01:12:51PM -0500, Bhardwaj, Rajneesh wrote:
> 
> On 12/20/2021 4:29 AM, Daniel Vetter wrote:
> > On Fri, Dec 10, 2021 at 07:58:50AM +0100, Christian König wrote:
> > > Am 09.12.21 um 19:28 schrieb Felix Kuehling:
> > > > Am 2021-12-09 um 10:30 a.m. schrieb Christian König:
> > > > > That still won't work.
> > > > > 
> > > > > But I think we could do this change for the amdgpu mmap callback only.
> > > > If graphics user mode has problems with it, we could even make this
> > > > specific to KFD BOs in the amdgpu_gem_object_mmap callback.
> > > I think it's fine for the whole amdgpu stack, my concern is more about
> > > radeon, nouveau and the ARM stacks which are using this as well.
> > > 
> > > That blew up so nicely the last time we tried to change it and I know of 
> > > at
> > > least one case where radeon was/is used with BOs in a child process.
> > I'm way late and burried again, but I think it'd be good to be consistent
> > here across drivers. Or at least across drm drivers. And we've had the vma
> > open/close refcounting to make fork work since forever.
> > 
> > I think if we do this we should really only do this for mmap() where this
> > applies, but reading through the thread here I'm honestly confused why
> > this is a problem. If CRIU can't handle forked mmaps it needs to be
> > thought that, not hacked around. Or at least I'm not understanding why
> > this shouldn't work ...
> > -Daniel
> > 
> 
> Hi Daniel
> 
> In the v2
> https://lore.kernel.org/all/a1a865f5-ad2c-29c8-cbe4-2635d53ec...@amd.com/T/
> I pretty much limited the scope of the change to KFD BOs on mmap. Regarding
> CRIU, I think its not a CRIU problem as CRIU on restore, only tries to
> recreate all the child processes and then mmaps all the VMAs it sees (as per
> checkpoint snapshot) in the new process address space after the VMA
> placements are finalized in the position independent code phase. Since the
> inherited VMAs don't have access rights the criu mmap fails.

Still sounds funky. I think minimally we should have an ack from CRIU
developers that this is officially the right way to solve this problem. I
really don't want to have random one-off hacks that don't work across the
board, for a problem where we (drm subsystem) really shouldn't be the only
one with this problem. Where "this problem" means that the mmap space is
per file description, and not per underlying inode or real device or
whatever. That part sounds like a CRIU problem, and I expect CRIU folks
want a consistent solution across the board for this. Hence please grab an
ack from them.

Cheers, Daniel

> 
> Regards,
> 
> Rajneesh
> 
> > > Regards,
> > > Christian.
> > > 
> > > > Regards,
> > > >     Felix
> > > > 
> > > > 
> > > > > Regards,
> > > > > Christian.
> > > > > 
> > > > > Am 09.12.21 um 16:29 schrieb Bhardwaj, Rajneesh:
> > > > > > Sounds good. I will send a v2 with only ttm_bo_mmap_obj change. 
> > > > > > Thank
> > > > > > you!
> > > > > > 
> > > > > > On 12/9/2021 10:27 AM, Christian König wrote:
> > > > > > > Hi Rajneesh,
> > > > > > > 
> > > > > > > yes, separating this from the drm_gem_mmap_obj() change is 
> > > > > > > certainly
> > > > > > > a good idea.
> > > > > > > 
> > > > > > > > The child cannot access the BOs mapped by the parent anyway with
> > > > > > > > access restrictions applied
> > > > > > > exactly that is not correct. That behavior is actively used by 
> > > > > > > some
> > > > > > > userspace stacks as far as I know.
> > > > > > > 
> > > > > > > Regards,
> > > > > > > Christian.
> > > > > > > 
> > > > > > > Am 09.12.21 um 16:23 schrieb Bhardwaj, Rajneesh:
> > > > > > > > Thanks Christian. Would it make it less intrusive if I just use 
> > > > > > > > the
> > > > > > > > flag for ttm bo mmap and remove the drm_gem_mmap_obj change from
> > > > > > > > this patch? For our use case, just the ttm_bo_mmap_obj change
> > > > > > > > should suffice and we don't want to put any more work arounds in
> > > > > > > > the user space (thunk, in our case).
> > > > > > > > 
> > > > > > > > The child cannot access the BOs mapped by the parent anyway with
> > > > > > > > access restrictions applied so I wonder why even inherit the 
> > > > > > > > vma?
> > > > > > > > 
> > > > > > > > On 12/9/2021 2:54 AM, Christian König wrote:
> > > > > > > > > Am 08.12.21 um 21:53 schrieb Rajneesh Bhardwaj:
> > > > > > > > > > When an application having open file access to a node 
> > > > > > > > > > forks, its
> > > > > > > > > > shared
> > > > > > > > > > mappings also get reflected in the address space of child 
> > > > > > > > > > process
> > > > > > > > > > even
> > > > > > > > > > though it cannot access them with the object permissions 
> > > > > > > > > > applied.
> > > > > > > > > > With the
> > > > > > > > > > existing permission checks on the gem objects, it might be
> > > > > > > > > > reasonable to
> > > > > > > > > > also create the VMAs with VM_DONTCOPY flag so a user space
> > > > > > > > > > application
> > > > > > >

[PATCH 1/6] drm/amd/display: Fix underflow for fused display pipes case

From: Yi-Ling Chen 

[Why]
Depend on res_pool->res_cap->num_timing_generator to query timing
gernerator information, it would case underflow at the fused display
pipes case.
Due to the res_pool->res_cap->num_timing_generator records default
timing generator resource built in driver, not the current chip.

[How]
Some ASICs would be fused display pipes less than the default setting.
In dcnxx_resource_construct function, driver would obatin real timing
generator count and store it into res_pool->timing_generator_count.

Reviewed-by: Anthony Koo 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Yi-Ling Chen 
---
 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index f19015413ce3..530a72e3eefe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -1365,7 +1365,12 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state 
*context)
uint32_t opp_id_src1 = OPP_ID_INVALID;
 
// Step 1: To find out which OPTC is running & OPTC DSC is ON
-   for (i = 0; i < dc->res_pool->res_cap->num_timing_generator; 
i++) {
+   // We can't use res_pool->res_cap->num_timing_generator to check
+   // Because it records display pipes default setting built in 
driver,
+   // not display pipes of the current chip.
+   // Some ASICs would be fused display pipes less than the 
default setting.
+   // In dcnxx_resource_construct function, driver would obatin 
real information.
+   for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
uint32_t optc_dsc_state = 0;
struct timing_generator *tg = 
dc->res_pool->timing_generators[i];
 
-- 
2.25.1

[PATCH 0/6] DC Patches December 22, 2021

Hi,

This is the last DC upstream of this year. As a result, it is a very
tiny one with a few bug fixes.

Just for curiosity, I decided to calculate how many patches we upstream
via this weekly process in 2021, and it was approximately 740 patches
where Daniel Wheeler tested each patchset. Thanks to everybody for
helping with this process, and special thanks to Daniel that validate
each patchset :)

Cc: Daniel Wheeler 

Best Regards
Siqueira

Charlene Liu (1):
  drm/amd/display: Add check for forced_clocks debug option

Mikita Lipski (1):
  drm/amd/display: introduce mpo detection flags

Nicholas Kazlauskas (2):
  drm/amd/display: Don't reinitialize DMCUB on s0ix resume
  drm/amd/display: Add version check before using DP alt query interface

Wenjing Liu (1):
  drm/amd/display: unhard code link to phy idx mapping in dc link and
clean up

Yi-Ling Chen (1):
  drm/amd/display: Fix underflow for fused display pipes case

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  40 -
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |   2 +
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 145 +++---
 .../gpu/drm/amd/display/dc/core/dc_resource.c |  33 
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c |   7 +-
 .../display/dc/dcn31/dcn31_dio_link_encoder.c | 114 +++---
 .../drm/amd/display/dc/dcn31/dcn31_resource.c |   2 +-
 drivers/gpu/drm/amd/display/dc/dm_cp_psp.h|   4 +-
 drivers/gpu/drm/amd/display/dc/inc/resource.h |   1 +
 .../drm/amd/display/modules/inc/mod_hdcp.h|   2 +-
 10 files changed, 234 insertions(+), 116 deletions(-)

-- 
2.25.1

[PATCH 4/6] drm/amd/display: Add check for forced_clocks debug option

From: Charlene Liu 

[why]
driver missed the check.

[how]
add the check.
add min display clock = 100mhz check based on dccg doc.

[note]
add SetPhyclkVoltageByFreq as confirmed with smu, but not enabled in
this change.

Reviewed-by: Dmytro Laktyushkin 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Charlene Liu 
---
 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 7c64317a56f7..40778c05f9b3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -1978,7 +1978,7 @@ static void dcn31_calculate_wm_and_dlg_fp(
pipes[pipe_idx].clks_cfg.dispclk_mhz = 
get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
pipes[pipe_idx].clks_cfg.dppclk_mhz = 
get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
 
-   if (dc->config.forced_clocks) {
+   if (dc->config.forced_clocks || dc->debug.max_disp_clk) {
pipes[pipe_idx].clks_cfg.dispclk_mhz = 
context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
pipes[pipe_idx].clks_cfg.dppclk_mhz = 
context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
}
-- 
2.25.1

[PATCH 3/6] drm/amd/display: Don't reinitialize DMCUB on s0ix resume

From: Nicholas Kazlauskas 

[Why]
PSP will suspend and resume DMCUB. Driver should just wait for DMCUB to
finish the auto load before continuining instead of placing it into
reset, wiping its firmware state and reinitializing.

If we don't let DMCUB fully finish initializing for S0ix then some state
will be lost and screen corruption can occur due to incorrect address
translation.

[How]
Use dmub_srv callbacks to determine in DMCUB is running and wait for
auto-load to complete before continuining.

In S0ix DMCUB will be running and DAL fw so initialize will skip.

In S3 DMCUB will not be running and we will do a full hardware init.

In S3 DMCUB will be running but will not be DAL fw so we will also do
a full hardware init.

Reviewed-by: Mikita Lipski 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Nicholas Kazlauskas 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 30 +--
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index bfeb84d2d1a0..2432b0c0be69 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1161,6 +1161,32 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
return 0;
 }
 
+static void dm_dmub_hw_resume(struct amdgpu_device *adev)
+{
+   struct dmub_srv *dmub_srv = adev->dm.dmub_srv;
+   enum dmub_status status;
+   bool init;
+
+   if (!dmub_srv) {
+   /* DMUB isn't supported on the ASIC. */
+   return;
+   }
+
+   status = dmub_srv_is_hw_init(dmub_srv, &init);
+   if (status != DMUB_STATUS_OK)
+   DRM_WARN("DMUB hardware init check failed: %d\n", status);
+
+   if (status == DMUB_STATUS_OK && init) {
+   /* Wait for firmware load to finish. */
+   status = dmub_srv_wait_for_auto_load(dmub_srv, 10);
+   if (status != DMUB_STATUS_OK)
+   DRM_WARN("Wait for DMUB auto-load failed: %d\n", 
status);
+   } else {
+   /* Perform the full hardware initialization. */
+   dm_dmub_hw_init(adev);
+   }
+}
+
 #if defined(CONFIG_DRM_AMD_DC_DCN)
 static void mmhub_read_system_context(struct amdgpu_device *adev, struct 
dc_phy_addr_space_config *pa_config)
 {
@@ -2637,9 +2663,7 @@ static int dm_resume(void *handle)
amdgpu_dm_outbox_init(adev);
 
/* Before powering on DC we need to re-initialize DMUB. */
-   r = dm_dmub_hw_init(adev);
-   if (r)
-   DRM_ERROR("DMUB interface failed to initialize: status=%d\n", 
r);
+   dm_dmub_hw_resume(adev);
 
/* power on hardware */
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
-- 
2.25.1

[PATCH 2/6] drm/amd/display: unhard code link to phy idx mapping in dc link and clean up

From: Wenjing Liu 

[why]
1. Current code hard codes link to PHY mapping in dc link level per asic
per revision.
This is not scalable. In long term the mapping will be obatined from
DMUB and store in dc resource.

2. Depending on DCN revision and endpoint type, the definition of
dio_output_idx dio_output_type and phy_idx are not  consistent. We need
to unify the meaning of these hardware indices across different system
configuration.

[how]
1. Temporarly move the hardcoded mapping to dc_resource level, which
should have full awareness of asic specific configuration and add a TODO
comment to move the mapping to DMUB.

2. populate dio_output_idx/phy_idx for all configuration, define
usb4_enabled bit instead of dio_output_type as an external enum.

Reviewed-by: Eric Yang 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Wenjing Liu 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 145 +++---
 .../gpu/drm/amd/display/dc/core/dc_resource.c |  33 
 drivers/gpu/drm/amd/display/dc/dm_cp_psp.h|   4 +-
 drivers/gpu/drm/amd/display/dc/inc/resource.h |   1 +
 .../drm/amd/display/modules/inc/mod_hdcp.h|   2 +-
 5 files changed, 95 insertions(+), 90 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index dc1380b6c5e0..b5e570d33ca9 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -3971,102 +3971,73 @@ static enum dc_status deallocate_mst_payload(struct 
pipe_ctx *pipe_ctx)
 static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off)
 {
struct cp_psp *cp_psp = &pipe_ctx->stream->ctx->cp_psp;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
struct link_encoder *link_enc = NULL;
-#endif
+   struct cp_psp_stream_config config = {0};
+   enum dp_panel_mode panel_mode =
+   dp_get_panel_mode(pipe_ctx->stream->link);
 
-   if (cp_psp && cp_psp->funcs.update_stream_config) {
-   struct cp_psp_stream_config config = {0};
-   enum dp_panel_mode panel_mode =
-   dp_get_panel_mode(pipe_ctx->stream->link);
+   if (cp_psp == NULL || cp_psp->funcs.update_stream_config == NULL)
+   return;
 
-   config.otg_inst = (uint8_t) pipe_ctx->stream_res.tg->inst;
-   /*stream_enc_inst*/
-   config.dig_fe = (uint8_t) 
pipe_ctx->stream_res.stream_enc->stream_enc_inst;
-   config.dig_be = pipe_ctx->stream->link->link_enc_hw_inst;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-   config.stream_enc_idx = pipe_ctx->stream_res.stream_enc->id - 
ENGINE_ID_DIGA;
-
-   if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY ||
-   pipe_ctx->stream->link->ep_type == 
DISPLAY_ENDPOINT_USB4_DPIA) {
-   if (pipe_ctx->stream->link->ep_type == 
DISPLAY_ENDPOINT_PHY)
-   link_enc = pipe_ctx->stream->link->link_enc;
-   else if (pipe_ctx->stream->link->ep_type == 
DISPLAY_ENDPOINT_USB4_DPIA)
-   if 
(pipe_ctx->stream->link->dc->res_pool->funcs->link_encs_assign) {
-   link_enc = 
link_enc_cfg_get_link_enc_used_by_stream(
-   
pipe_ctx->stream->ctx->dc,
-   pipe_ctx->stream);
-   }
-   ASSERT(link_enc);
+   if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY)
+   link_enc = pipe_ctx->stream->link->link_enc;
+   else if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA 
&&
+   
pipe_ctx->stream->link->dc->res_pool->funcs->link_encs_assign)
+   link_enc = link_enc_cfg_get_link_enc_used_by_stream(
+   pipe_ctx->stream->ctx->dc,
+   pipe_ctx->stream);
+   ASSERT(link_enc);
+   if (link_enc == NULL)
+   return;
 
-   // Initialize PHY ID with ABCDE - 01234 mapping except 
when it is B0
-   config.phy_idx = link_enc->transmitter - 
TRANSMITTER_UNIPHY_A;
+   /* otg instance */
+   config.otg_inst = (uint8_t) pipe_ctx->stream_res.tg->inst;
 
-   // Add flag to guard new A0 DIG mapping
-   if (pipe_ctx->stream->ctx->dc->enable_c20_dtm_b0 == 
true &&
-   
pipe_ctx->stream->link->dc->ctx->dce_version == DCN_VERSION_3_1) {
-   config.dig_be = link_enc->preferred_engine;
-   config.dio_output_type = 
pipe_ctx->stream->link->ep_type;
-   config.dio_output_idx = link_enc->transmitter - 
TRANSMITTER_UNIPHY_A;
-   } else {
-   config.dio_output_t

[PATCH 5/6] drm/amd/display: introduce mpo detection flags

From: Mikita Lipski 

[why]
We want to know if new crtc state is enabling MPO configuration before
enabling it.
[how]
Detect if both primary and overlay planes are enabled on the same CRTC.

Reviewed-by: Bhawanpreet Lakha 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Mikita Lipski 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 +-
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 2432b0c0be69..c77207b02e68 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10761,6 +10761,8 @@ static int dm_update_plane_state(struct dc *dc,
 
dm_new_plane_state->dc_state = dc_new_plane_state;
 
+   dm_new_crtc_state->mpo_requested |= (plane->type == 
DRM_PLANE_TYPE_OVERLAY);
+
/* Tell DC to do a full surface update every time there
 * is a plane change. Inefficient, but works for now.
 */
@@ -10913,7 +10915,7 @@ static int amdgpu_dm_atomic_check(struct drm_device 
*dev,
enum dc_status status;
int ret, i;
bool lock_and_validation_needed = false;
-   struct dm_crtc_state *dm_old_crtc_state;
+   struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
 #if defined(CONFIG_DRM_AMD_DC_DCN)
struct dsc_mst_fairness_vars vars[MAX_PIPES];
struct drm_dp_mst_topology_state *mst_state;
@@ -11095,6 +11097,12 @@ static int amdgpu_dm_atomic_check(struct drm_device 
*dev,
goto fail;
}
 
+   for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+   dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+   if (dm_new_crtc_state->mpo_requested)
+   DRM_DEBUG_DRIVER("MPO enablement requested on 
crtc:[%p]\n", crtc);
+   }
+
/* Check cursor planes scaling */
for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
ret = dm_check_crtc_cursor(state, crtc, new_crtc_state);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index c98e402eab0c..b9a69b0cef23 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -626,6 +626,8 @@ struct dm_crtc_state {
bool cm_has_degamma;
bool cm_is_degamma_srgb;
 
+   bool mpo_requested;
+
int update_type;
int active_planes;
 
-- 
2.25.1

[RFC v2 0/8] Define and use reset domain for GPU recovery in amdgpu

This patchset is based on earlier work by Boris[1] that allowed to have an
ordered workqueue at the driver level that will be used by the different
schedulers to queue their timeout work. On top of that I also serialized
any GPU reset we trigger from within amdgpu code to also go through the same
ordered wq and in this way simplify somewhat our GPU reset code so we don't need
to protect from concurrency by multiple GPU reset triggeres such as TDR on one
hand and sysfs trigger or RAS trigger on the other hand.

As advised by Christian and Daniel I defined a reset_domain struct such that
all the entities that go through reset together will be serialized one against
another. 

TDR triggered by multiple entities within the same domain due to the same 
reason will not
be triggered as the first such reset will cancel all the pending resets. This is
relevant only to TDR timers and not to triggered resets coming from RAS or 
SYSFS,
those will still happen after the in flight resets finishes.

v2:
Add handling on SRIOV configuration, the reset notify coming from host 
and driver already trigger a work queue to handle the reset so drop this
intermidiate wq and send directly to timeout wq. (Shaoyun)

[1] 
https://patchwork.kernel.org/project/dri-devel/patch/20210629073510.2764391-3-boris.brezil...@collabora.com/

P.S Going through drm-misc-next and not amd-staging-drm-next as Boris work 
hasn't landed yet there.

Andrey Grodzovsky (8):
  drm/amdgpu: Introduce reset domain
  drm/amdgpu: Move scheduler init to after XGMI is ready
  drm/amdgpu: Fix crash on modprobe
  drm/amdgpu: Serialize non TDR gpu recovery with TDRs
  drm/amd/virt: For SRIOV send GPU reset directly to TDR queue.
  drm/amdgpu: Drop hive->in_reset
  drm/amdgpu: Drop concurrent GPU reset protection for device
  drm/amd/virt: Drop concurrent GPU reset protection for SRIOV

 drivers/gpu/drm/amd/amdgpu/amdgpu.h|   9 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 206 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  |  36 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c|   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   |  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |   3 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c  |  18 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c  |  18 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c  |   7 +-
 10 files changed, 147 insertions(+), 164 deletions(-)

-- 
2.25.1

[RFC v2 1/8] drm/amdgpu: Introduce reset domain

Defined a reset_domain struct such that
all the entities that go through reset
together will be serialized one against
another. Do it for both single device and
XGMI hive cases.

Signed-off-by: Andrey Grodzovsky 
Suggested-by: Daniel Vetter 
Suggested-by: Christian König 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   |  9 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 ++
 4 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9f017663ac50..b5ff76aae7e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -812,6 +812,11 @@ struct amd_powerplay {
 
 #define AMDGPU_RESET_MAGIC_NUM 64
 #define AMDGPU_MAX_DF_PERFMONS 4
+
+struct amdgpu_reset_domain {
+   struct workqueue_struct *wq;
+};
+
 struct amdgpu_device {
struct device   *dev;
struct pci_dev  *pdev;
@@ -1096,6 +1101,8 @@ struct amdgpu_device {
 
struct amdgpu_reset_control *reset_cntl;
uint32_t
ip_versions[HW_ID_MAX][HWIP_MAX_INSTANCE];
+
+   struct amdgpu_reset_domain  reset_domain;
 };
 
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 90d22a376632..0f3e6c078f88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2391,9 +2391,27 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
if (r)
goto init_failed;
 
-   if (adev->gmc.xgmi.num_physical_nodes > 1)
+   if (adev->gmc.xgmi.num_physical_nodes > 1) {
+   struct amdgpu_hive_info *hive;
+
amdgpu_xgmi_add_device(adev);
 
+   hive = amdgpu_get_xgmi_hive(adev);
+   if (!hive || !hive->reset_domain.wq) {
+   DRM_ERROR("Failed to obtain reset domain info for XGMI 
hive:%llx", hive->hive_id);
+   r = -EINVAL;
+   goto init_failed;
+   }
+
+   adev->reset_domain.wq = hive->reset_domain.wq;
+   } else {
+   adev->reset_domain.wq = 
alloc_ordered_workqueue("amdgpu-reset-dev", 0);
+   if (!adev->reset_domain.wq) {
+   r = -ENOMEM;
+   goto init_failed;
+   }
+   }
+
/* Don't init kfd if whole hive need to be reset during init */
if (!adev->gmc.xgmi.pending_reset)
amdgpu_amdkfd_device_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 567df2db23ac..a858e3457c5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -392,6 +392,14 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct 
amdgpu_device *adev)
goto pro_end;
}
 
+   hive->reset_domain.wq = alloc_ordered_workqueue("amdgpu-reset-hive", 0);
+   if (!hive->reset_domain.wq) {
+   dev_err(adev->dev, "XGMI: failed allocating wq for reset 
domain!\n");
+   kfree(hive);
+   hive = NULL;
+   goto pro_end;
+   }
+
hive->hive_id = adev->gmc.xgmi.hive_id;
INIT_LIST_HEAD(&hive->device_list);
INIT_LIST_HEAD(&hive->node);
@@ -401,6 +409,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct 
amdgpu_device *adev)
task_barrier_init(&hive->tb);
hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
hive->hi_req_gpu = NULL;
+
/*
 * hive pstate on boot is high in vega20 so we have to go to low
 * pstate on after boot.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index d2189bf7d428..6121aaa292cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -42,6 +42,8 @@ struct amdgpu_hive_info {
AMDGPU_XGMI_PSTATE_MAX_VEGA20,
AMDGPU_XGMI_PSTATE_UNKNOWN
} pstate;
+
+   struct amdgpu_reset_domain reset_domain;
 };
 
 struct amdgpu_pcs_ras_field {
-- 
2.25.1

[RFC v2 2/8] drm/amdgpu: Move scheduler init to after XGMI is ready

Before we initialize schedulers we must know which reset
domain are we in - for single device there iis a single
domain per device and so single wq per device. For XGMI
the reset domain spans the entire XGMI hive and so the
reset wq is per hive.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 34 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
 3 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0f3e6c078f88..7c063fd37389 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2284,6 +2284,47 @@ static int amdgpu_device_fw_loading(struct amdgpu_device 
*adev)
return r;
 }
 
+static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
+{
+   long timeout;
+   int r, i;
+
+   for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+   struct amdgpu_ring *ring = adev->rings[i];
+
+   /* No need to setup the GPU scheduler for rings that don't need 
it */
+   if (!ring || ring->no_scheduler)
+   continue;
+
+   switch (ring->funcs->type) {
+   case AMDGPU_RING_TYPE_GFX:
+   timeout = adev->gfx_timeout;
+   break;
+   case AMDGPU_RING_TYPE_COMPUTE:
+   timeout = adev->compute_timeout;
+   break;
+   case AMDGPU_RING_TYPE_SDMA:
+   timeout = adev->sdma_timeout;
+   break;
+   default:
+   timeout = adev->video_timeout;
+   break;
+   }
+
+   r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+  ring->num_hw_submission, 
amdgpu_job_hang_limit,
+  timeout, adev->reset_domain.wq, 
ring->sched_score, ring->name);
+   if (r) {
+   DRM_ERROR("Failed to create scheduler on ring %s.\n",
+ ring->name);
+   return r;
+   }
+   }
+
+   return 0;
+}
+
+
 /**
  * amdgpu_device_ip_init - run init for hardware IPs
  *
@@ -2412,6 +2453,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
}
}
 
+   r = amdgpu_device_init_schedulers(adev);
+   if (r)
+   goto init_failed;
+
/* Don't init kfd if whole hive need to be reset during init */
if (!adev->gmc.xgmi.pending_reset)
amdgpu_amdkfd_device_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3b7e86ea7167..5527c68c51de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -456,8 +456,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
  atomic_t *sched_score)
 {
struct amdgpu_device *adev = ring->adev;
-   long timeout;
-   int r;
 
if (!adev)
return -EINVAL;
@@ -477,36 +475,12 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring 
*ring,
spin_lock_init(&ring->fence_drv.lock);
ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
 GFP_KERNEL);
-   if (!ring->fence_drv.fences)
-   return -ENOMEM;
 
-   /* No need to setup the GPU scheduler for rings that don't need it */
-   if (ring->no_scheduler)
-   return 0;
+   ring->num_hw_submission = num_hw_submission;
+   ring->sched_score = sched_score;
 
-   switch (ring->funcs->type) {
-   case AMDGPU_RING_TYPE_GFX:
-   timeout = adev->gfx_timeout;
-   break;
-   case AMDGPU_RING_TYPE_COMPUTE:
-   timeout = adev->compute_timeout;
-   break;
-   case AMDGPU_RING_TYPE_SDMA:
-   timeout = adev->sdma_timeout;
-   break;
-   default:
-   timeout = adev->video_timeout;
-   break;
-   }
-
-   r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
-  num_hw_submission, amdgpu_job_hang_limit,
-  timeout, NULL, sched_score, ring->name);
-   if (r) {
-   DRM_ERROR("Failed to create scheduler on ring %s.\n",
- ring->name);
-   return r;
-   }
+   if (!ring->fence_drv.fences)
+   return -ENOMEM;
 
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4d380e79752c..a4b8279e3011 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -

[RFC v2 3/8] drm/amdgpu: Fix crash on modprobe

Restrict jobs resubmission to suspend case
only since schedulers not initialised yet on
probe.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 5527c68c51de..8ebd954e06c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -582,7 +582,7 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
if (!ring || !ring->fence_drv.initialized)
continue;
 
-   if (!ring->no_scheduler) {
+   if (adev->in_suspend && !ring->no_scheduler) {
drm_sched_resubmit_jobs(&ring->sched);
drm_sched_start(&ring->sched, true);
}
-- 
2.25.1

[RFC v2 4/8] drm/amdgpu: Serialize non TDR gpu recovery with TDRs

Use reset domain wq also for non TDR gpu recovery trigers
such as sysfs and RAS. We must serialize all possible
GPU recoveries to gurantee no concurrency there.
For TDR call the original recovery function directly since
it's already executed from within the wq. For others just
use a wrapper to qeueue work and wait on it to finish.

v2: Rename to amdgpu_recover_work_struct

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c|  2 +-
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b5ff76aae7e0..8e96b9a14452 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1296,6 +1296,8 @@ bool amdgpu_device_has_job_running(struct amdgpu_device 
*adev);
 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
  struct amdgpu_job* job);
+int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
+ struct amdgpu_job *job);
 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
 int amdgpu_device_pci_reset(struct amdgpu_device *adev);
 bool amdgpu_device_need_post(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7c063fd37389..258ec3c0b2af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4979,7 +4979,7 @@ static void amdgpu_device_recheck_guilty_jobs(
  * Returns 0 for success or an error on failure.
  */
 
-int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
  struct amdgpu_job *job)
 {
struct list_head device_list, *device_list_handle =  NULL;
@@ -5237,6 +5237,37 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
return r;
 }
 
+struct amdgpu_recover_work_struct {
+   struct work_struct base;
+   struct amdgpu_device *adev;
+   struct amdgpu_job *job;
+   int ret;
+};
+
+static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
+{
+   struct amdgpu_recover_work_struct *recover_work = container_of(work, 
struct amdgpu_recover_work_struct, base);
+
+   recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, 
recover_work->job);
+}
+/*
+ * Serialize gpu recover into reset domain single threaded wq
+ */
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+   struct amdgpu_job *job)
+{
+   struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
+
+   INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
+
+   if (!queue_work(adev->reset_domain.wq, &work.base))
+   return -EAGAIN;
+
+   flush_work(&work.base);
+
+   return work.ret;
+}
+
 /**
  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index bfc47bea23db..38c9fd7b7ad4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -63,7 +63,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
  ti.process_name, ti.tgid, ti.task_name, ti.pid);
 
if (amdgpu_device_should_recover_gpu(ring->adev)) {
-   amdgpu_device_gpu_recover(ring->adev, job);
+   amdgpu_device_gpu_recover_imp(ring->adev, job);
} else {
drm_sched_suspend_timeout(&ring->sched);
if (amdgpu_sriov_vf(adev))
-- 
2.25.1

[PATCH 6/6] drm/amd/display: Add version check before using DP alt query interface

From: Nicholas Kazlauskas 

[Why]
To maintain compatibility with firmware older than 4.0.11.

Those firmware may have interrmittent hangs with RDCSPIPE or the PHY,
but we shouldn't regress their previous behavior.

[How]
Use the new path if firmware is development or 4.0.11 or newer. Use the
legacy path otherwise.

Fixes: b60a041393f7 ("drm/amd/display: Query DMCUB for dp alt status")

Reviewed-by: Hansen Dsouza 
Acked-by: Rodrigo Siqueira 
Signed-off-by: Nicholas Kazlauskas 
---
 .../display/dc/dcn31/dcn31_dio_link_encoder.c | 114 +++---
 1 file changed, 94 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
index 71c359f9cdd2..8b9b1a5309ba 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
@@ -100,6 +100,35 @@ static uint8_t phy_id_from_transmitter(enum transmitter t)
return phy_id;
 }
 
+static bool has_query_dp_alt(struct link_encoder *enc)
+{
+   struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv;
+
+   /* Supports development firmware and firmware >= 4.0.11 */
+   return dc_dmub_srv &&
+  !(dc_dmub_srv->dmub->fw_version >= DMUB_FW_VERSION(4, 0, 0) &&
+dc_dmub_srv->dmub->fw_version <= DMUB_FW_VERSION(4, 0, 10));
+}
+
+static bool query_dp_alt_from_dmub(struct link_encoder *enc,
+  union dmub_rb_cmd *cmd)
+{
+   struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+   struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv;
+
+   memset(cmd, 0, sizeof(*cmd));
+   cmd->query_dp_alt.header.type = DMUB_CMD__VBIOS;
+   cmd->query_dp_alt.header.sub_type =
+   DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT;
+   cmd->query_dp_alt.header.payload_bytes = sizeof(cmd->query_dp_alt.data);
+   cmd->query_dp_alt.data.phy_id = 
phy_id_from_transmitter(enc10->base.transmitter);
+
+   if (!dc_dmub_srv_cmd_with_reply_data(dc_dmub_srv, cmd))
+   return false;
+
+   return true;
+}
+
 void dcn31_link_encoder_set_dio_phy_mux(
struct link_encoder *enc,
enum encoder_type_select sel,
@@ -569,45 +598,90 @@ void dcn31_link_encoder_disable_output(
 bool dcn31_link_encoder_is_in_alt_mode(struct link_encoder *enc)
 {
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
-   struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv;
union dmub_rb_cmd cmd;
-   bool is_usb_c_alt_mode = false;
+   uint32_t dp_alt_mode_disable;
 
-   if (enc->features.flags.bits.DP_IS_USB_C && dc_dmub_srv) {
-   memset(&cmd, 0, sizeof(cmd));
-   cmd.query_dp_alt.header.type = DMUB_CMD__VBIOS;
-   cmd.query_dp_alt.header.sub_type = 
DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT;
-   cmd.query_dp_alt.header.payload_bytes = 
sizeof(cmd.panel_cntl.data);
-   cmd.query_dp_alt.data.phy_id = 
phy_id_from_transmitter(enc10->base.transmitter);
+   /* Only applicable to USB-C PHY. */
+   if (!enc->features.flags.bits.DP_IS_USB_C)
+   return false;
 
-   if (!dc_dmub_srv_cmd_with_reply_data(dc_dmub_srv, &cmd))
+   /*
+* Use the new interface from DMCUB if available.
+* Avoids hanging the RDCPSPIPE if DMCUB wasn't already running.
+*/
+   if (has_query_dp_alt(enc)) {
+   if (!query_dp_alt_from_dmub(enc, &cmd))
return false;
 
-   is_usb_c_alt_mode = (cmd.query_dp_alt.data.is_dp_alt_disable == 
0);
+   return (cmd.query_dp_alt.data.is_dp_alt_disable == 0);
}
 
-   return is_usb_c_alt_mode;
+   /* Legacy path, avoid if possible. */
+   if (enc->ctx->asic_id.hw_internal_rev != YELLOW_CARP_B0) {
+   REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE,
+   &dp_alt_mode_disable);
+   } else {
+   /*
+* B0 phys use a new set of registers to check whether alt mode 
is disabled.
+* if value == 1 alt mode is disabled, otherwise it is enabled.
+*/
+   if ((enc10->base.transmitter == TRANSMITTER_UNIPHY_A) ||
+   (enc10->base.transmitter == TRANSMITTER_UNIPHY_B) ||
+   (enc10->base.transmitter == TRANSMITTER_UNIPHY_E)) {
+   REG_GET(RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE,
+   &dp_alt_mode_disable);
+   } else {
+   REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE,
+   &dp_alt_mode_disable);
+   }
+   }
+
+   return (dp_alt_mode_disable == 0);
 }
 
 void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, struct 
dc_link_settings *link_settings)
 {
struct dcn10_link_encoder *enc10 = TO_DCN10_

RE: [PATCH 0/6] DC Patches December 22, 2021

2021-12-22 Thread Wheeler, Daniel

[AMD Official Use Only]

Hi all,
 
This week this patchset was tested on the following systems:
 
Lenovo Thinkpad T14s Gen2 with AMD Ryzen 5 5650U, with the following display 
types: eDP 1080p 60hz, 4k 60hz  (via USB-C to DP/HDMI), 1440p 144hz (via USB-C 
to DP/HDMI), 1680*1050 60hz (via USB-C to DP and then DP to DVI/VGA)
 
Sapphire Pulse RX5700XT with the following display types:
4k 60hz  (via DP/HDMI), 1440p 144hz (via DP/HDMI), 1680*1050 60hz (via DP to 
DVI/VGA)
 
Reference AMD RX6800 with the following display types:
4k 60hz  (via DP/HDMI and USB-C to DP/HDMI), 1440p 144hz (via USB-C to DP/HDMI 
and USB-C to DP/HDMI), 1680*1050 60hz (via DP to DVI/VGA)
 
Included testing using a Startech DP 1.4 MST hub at 2x 4k 60hz, and 3x 1080p 
60hz on all systems. Also tested DSC via USB-C to DP DSC Hub with 3x 4k 60hz on 
Ryzen 9 5900h and Ryzen 5 4500u.
 
Tested on Ubuntu 20.04.3 with Kernel Version 5.13
 
Tested-by: Daniel Wheeler 
 
 
Thank you,
 
Dan Wheeler
Technologist  |  AMD
SW Display
--
1 Commerce Valley Dr E, Thornhill, ON L3T 7X6
Facebook |  Twitter |  amd.com  

-Original Message-
From: Siqueira, Rodrigo  
Sent: December 22, 2021 5:05 PM
To: amd-gfx@lists.freedesktop.org
Cc: Wentland, Harry ; Li, Sun peng (Leo) 
; Lakha, Bhawanpreet ; Siqueira, 
Rodrigo ; Pillai, Aurabindo 
; Zhuo, Qingqing (Lillian) ; 
Lipski, Mikita ; Li, Roman ; 
anson.ja...@amd.com; Lin, Wayne ; Wang, Chao-kai (Stylon) 
; Chiu, Solomon ; Kotarac, Pavle 
; Gutierrez, Agustin ; 
Wheeler, Daniel 
Subject: [PATCH 0/6] DC Patches December 22, 2021

Hi,

This is the last DC upstream of this year. As a result, it is a very tiny one 
with a few bug fixes.

Just for curiosity, I decided to calculate how many patches we upstream via 
this weekly process in 2021, and it was approximately 740 patches where Daniel 
Wheeler tested each patchset. Thanks to everybody for helping with this 
process, and special thanks to Daniel that validate each patchset :)

Cc: Daniel Wheeler 

Best Regards
Siqueira

Charlene Liu (1):
  drm/amd/display: Add check for forced_clocks debug option

Mikita Lipski (1):
  drm/amd/display: introduce mpo detection flags

Nicholas Kazlauskas (2):
  drm/amd/display: Don't reinitialize DMCUB on s0ix resume
  drm/amd/display: Add version check before using DP alt query interface

Wenjing Liu (1):
  drm/amd/display: unhard code link to phy idx mapping in dc link and
clean up

Yi-Ling Chen (1):
  drm/amd/display: Fix underflow for fused display pipes case

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  40 -
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |   2 +
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 145 +++---  
.../gpu/drm/amd/display/dc/core/dc_resource.c |  33 
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c |   7 +-
 .../display/dc/dcn31/dcn31_dio_link_encoder.c | 114 +++---
 .../drm/amd/display/dc/dcn31/dcn31_resource.c |   2 +-
 drivers/gpu/drm/amd/display/dc/dm_cp_psp.h|   4 +-
 drivers/gpu/drm/amd/display/dc/inc/resource.h |   1 +
 .../drm/amd/display/modules/inc/mod_hdcp.h|   2 +-
 10 files changed, 234 insertions(+), 116 deletions(-)

--
2.25.1

[RFC v2 5/8] drm/amd/virt: For SRIOV send GPU reset directly to TDR queue.

No need to to trigger another work queue inside the work queue.

Suggested-by: Liu Shaoyun 
Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 7 +--
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 7 +--
 drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 7 +--
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 23b066bcffb2..487cd654b69e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -276,7 +276,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
if (amdgpu_device_should_recover_gpu(adev)
&& (!amdgpu_device_has_job_running(adev) ||
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT))
-   amdgpu_device_gpu_recover(adev, NULL);
+   amdgpu_device_gpu_recover_imp(adev, NULL);
 }
 
 static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -302,7 +302,10 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device 
*adev,
switch (event) {
case IDH_FLR_NOTIFICATION:
if (amdgpu_sriov_runtime(adev))
-   schedule_work(&adev->virt.flr_work);
+   WARN_ONCE(!queue_work(adev->reset_domain.wq,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __FUNCTION__ );
break;
case IDH_QUERY_ALIVE:
xgpu_ai_mailbox_send_ack(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index a35e6d87e537..e3869067a31d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -308,7 +308,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct 
*work)
adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
-   amdgpu_device_gpu_recover(adev, NULL);
+   amdgpu_device_gpu_recover_imp(adev, NULL);
 }
 
 static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -337,7 +337,10 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device 
*adev,
switch (event) {
case IDH_FLR_NOTIFICATION:
if (amdgpu_sriov_runtime(adev))
-   schedule_work(&adev->virt.flr_work);
+   WARN_ONCE(!queue_work(adev->reset_domain.wq,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __FUNCTION__ );
break;
/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can 
ignore
 * it byfar since that polling thread will handle it,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index aef9d059ae52..23e802cae2bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -521,7 +521,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct 
*work)
 
/* Trigger recovery due to world switch failure */
if (amdgpu_device_should_recover_gpu(adev))
-   amdgpu_device_gpu_recover(adev, NULL);
+   amdgpu_device_gpu_recover_imp(adev, NULL);
 }
 
 static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -551,7 +551,10 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device 
*adev,
 
/* only handle FLR_NOTIFY now */
if (!r)
-   schedule_work(&adev->virt.flr_work);
+   WARN_ONCE(!queue_work(adev->reset_domain.wq,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __FUNCTION__ );
}
 
return 0;
-- 
2.25.1

[RFC v2 6/8] drm/amdgpu: Drop hive->in_reset

Since we serialize all resets no need to protect from concurrent
resets.

Signed-off-by: Andrey Grodzovsky 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  1 -
 3 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 258ec3c0b2af..107a393ebbfd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5013,25 +5013,9 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device 
*adev,
dev_info(adev->dev, "GPU %s begin!\n",
need_emergency_restart ? "jobs stop":"reset");
 
-   /*
-* Here we trylock to avoid chain of resets executing from
-* either trigger by jobs on different adevs in XGMI hive or jobs on
-* different schedulers for same device while this TO handler is 
running.
-* We always reset all schedulers for device and all devices for XGMI
-* hive so that should take care of them too.
-*/
hive = amdgpu_get_xgmi_hive(adev);
-   if (hive) {
-   if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
-   DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as 
another already in progress",
-   job ? job->base.id : -1, hive->hive_id);
-   amdgpu_put_xgmi_hive(hive);
-   if (job && job->vm)
-   drm_sched_increase_karma(&job->base);
-   return 0;
-   }
+   if (hive)
mutex_lock(&hive->hive_lock);
-   }
 
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
@@ -5227,7 +5211,6 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device 
*adev,
 
 skip_recovery:
if (hive) {
-   atomic_set(&hive->in_reset, 0);
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index a858e3457c5c..9ad742039ac9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -404,7 +404,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct 
amdgpu_device *adev)
INIT_LIST_HEAD(&hive->device_list);
INIT_LIST_HEAD(&hive->node);
mutex_init(&hive->hive_lock);
-   atomic_set(&hive->in_reset, 0);
atomic_set(&hive->number_devices, 0);
task_barrier_init(&hive->tb);
hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 6121aaa292cb..2f2ce53645a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -33,7 +33,6 @@ struct amdgpu_hive_info {
struct list_head node;
atomic_t number_devices;
struct mutex hive_lock;
-   atomic_t in_reset;
int hi_req_count;
struct amdgpu_device *hi_req_gpu;
struct task_barrier tb;
-- 
2.25.1

[RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection for SRIOV

Since now flr work is serialized against  GPU resets
there is no need for this.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 11 ---
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 11 ---
 2 files changed, 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 487cd654b69e..7d59a66e3988 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -248,15 +248,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, 
virt);
int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
 
-   /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
-* otherwise the mailbox msg will be ruined/reseted by
-* the VF FLR.
-*/
-   if (!down_write_trylock(&adev->reset_sem))
-   return;
-
amdgpu_virt_fini_data_exchange(adev);
-   atomic_set(&adev->in_gpu_reset, 1);
 
xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
 
@@ -269,9 +261,6 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
} while (timeout > 1);
 
 flr_done:
-   atomic_set(&adev->in_gpu_reset, 0);
-   up_write(&adev->reset_sem);
-
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
&& (!amdgpu_device_has_job_running(adev) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index e3869067a31d..f82c066c8e8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -277,15 +277,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct 
*work)
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, 
virt);
int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
 
-   /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
-* otherwise the mailbox msg will be ruined/reseted by
-* the VF FLR.
-*/
-   if (!down_write_trylock(&adev->reset_sem))
-   return;
-
amdgpu_virt_fini_data_exchange(adev);
-   atomic_set(&adev->in_gpu_reset, 1);
 
xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
 
@@ -298,9 +290,6 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct 
*work)
} while (timeout > 1);
 
 flr_done:
-   atomic_set(&adev->in_gpu_reset, 0);
-   up_write(&adev->reset_sem);
-
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
&& (!amdgpu_device_has_job_running(adev) ||
-- 
2.25.1

[RFC v2 7/8] drm/amdgpu: Drop concurrent GPU reset protection for device

Since now all GPU resets are serialzied there is no need for this.

This patch also reverts 'drm/amdgpu: race issue when jobs on 2 ring timeout'

Signed-off-by: Andrey Grodzovsky 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 89 ++
 1 file changed, 7 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 107a393ebbfd..fef952ca8db5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4763,11 +4763,10 @@ int amdgpu_do_asic_reset(struct list_head 
*device_list_handle,
return r;
 }
 
-static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
+static void amdgpu_device_lock_adev(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive)
 {
-   if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
-   return false;
+   atomic_set(&adev->in_gpu_reset, 1);
 
if (hive) {
down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
@@ -4786,8 +4785,6 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device 
*adev,
adev->mp1_state = PP_MP1_STATE_NONE;
break;
}
-
-   return true;
 }
 
 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
@@ -4798,46 +4795,6 @@ static void amdgpu_device_unlock_adev(struct 
amdgpu_device *adev)
up_write(&adev->reset_sem);
 }
 
-/*
- * to lockup a list of amdgpu devices in a hive safely, if not a hive
- * with multiple nodes, it will be similar as amdgpu_device_lock_adev.
- *
- * unlock won't require roll back.
- */
-static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct 
amdgpu_hive_info *hive)
-{
-   struct amdgpu_device *tmp_adev = NULL;
-
-   if (adev->gmc.xgmi.num_physical_nodes > 1) {
-   if (!hive) {
-   dev_err(adev->dev, "Hive is NULL while device has 
multiple xgmi nodes");
-   return -ENODEV;
-   }
-   list_for_each_entry(tmp_adev, &hive->device_list, 
gmc.xgmi.head) {
-   if (!amdgpu_device_lock_adev(tmp_adev, hive))
-   goto roll_back;
-   }
-   } else if (!amdgpu_device_lock_adev(adev, hive))
-   return -EAGAIN;
-
-   return 0;
-roll_back:
-   if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) {
-   /*
-* if the lockup iteration break in the middle of a hive,
-* it may means there may has a race issue,
-* or a hive device locked up independently.
-* we may be in trouble and may not, so will try to roll back
-* the lock and give out a warnning.
-*/
-   dev_warn(tmp_adev->dev, "Hive lock iteration broke in the 
middle. Rolling back to unlock");
-   list_for_each_entry_continue_reverse(tmp_adev, 
&hive->device_list, gmc.xgmi.head) {
-   amdgpu_device_unlock_adev(tmp_adev);
-   }
-   }
-   return -EAGAIN;
-}
-
 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
 {
struct pci_dev *p = NULL;
@@ -5023,22 +4980,6 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device 
*adev,
reset_context.hive = hive;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
 
-   /*
-* lock the device before we try to operate the linked list
-* if didn't get the device lock, don't touch the linked list since
-* others may iterating it.
-*/
-   r = amdgpu_device_lock_hive_adev(adev, hive);
-   if (r) {
-   dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another 
already in progress",
-   job ? job->base.id : -1);
-
-   /* even we skipped this reset, still need to set the job to 
guilty */
-   if (job && job->vm)
-   drm_sched_increase_karma(&job->base);
-   goto skip_recovery;
-   }
-
/*
 * Build list of devices to reset.
 * In case we are in XGMI hive mode, resort the device list
@@ -5058,6 +4999,9 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device 
*adev,
 
/* block all schedulers and reset given job's ring */
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+
+   amdgpu_device_lock_adev(tmp_adev, hive);
+
/*
 * Try to put the audio codec into suspend state
 * before gpu reset started.
@@ -5209,13 +5153,12 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device 
*adev,
amdgpu_device_unlock_adev(tmp_adev);
}
 
-skip_recovery:
if (hive) {
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);

[Patch v4 00/24] CHECKPOINT RESTORE WITH ROCm

CRIU is a user space tool which is very popular for container live
migration in datacentres. It can checkpoint a running application, save
its complete state, memory contents and all system resources to images
on disk which can be migrated to another m achine and restored later.
More information on CRIU can be found at https://criu.org/Main_Page

CRIU currently does not support Checkpoint / Restore with applications
that have devices files open so it cannot perform checkpoint and restore
on GPU devices which are very complex and have their own VRAM managed
privately. CRIU, however can support external devices by using a plugin
architecture. We feel that we are getting close to finalizing our IOCTL
APIs which were again changed since V3 for an improved modular design.

Our changes to CRIU user space  are can be obtained from here:
https://github.com/RadeonOpenCompute/criu/tree/amdgpu_rfc-211222

We have tested the following scenarios:
 - Checkpoint / Restore of a Pytorch (BERT) workload
 - kfdtests with queues and events
 - Gfx9 and Gfx10 based multi GPU test systems 
 - On baremetal and inside a docker container
 - Restoring on a different system

V1: Initial
V2: Addressed review comments
V3: Rebased on latest amd-staging-drm-next (5.15 based)
v4: New API design and basic support for SVM, however there is an
outstanding issue with SVM restore which is currently under debug and
hopefully that won't impact the ioctl APIs as SVMs are treated as
private data hidden from user space like queues and events with the new
approch.


David Yat Sin (9):
  drm/amdkfd: CRIU Implement KFD unpause operation
  drm/amdkfd: CRIU add queues support
  drm/amdkfd: CRIU restore queue ids
  drm/amdkfd: CRIU restore sdma id for queues
  drm/amdkfd: CRIU restore queue doorbell id
  drm/amdkfd: CRIU checkpoint and restore queue mqds
  drm/amdkfd: CRIU checkpoint and restore queue control stack
  drm/amdkfd: CRIU checkpoint and restore events
  drm/amdkfd: CRIU implement gpu_id remapping

Rajneesh Bhardwaj (15):
  x86/configs: CRIU update debug rock defconfig
  x86/configs: Add rock-rel_defconfig for amd-feature-criu branch
  drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs
  drm/amdkfd: CRIU Implement KFD process_info ioctl
  drm/amdkfd: CRIU Implement KFD checkpoint ioctl
  drm/amdkfd: CRIU Implement KFD restore ioctl
  drm/amdkfd: CRIU Implement KFD resume ioctl
  drm/amdkfd: CRIU export BOs as prime dmabuf objects
  drm/amdkfd: CRIU checkpoint and restore xnack mode
  drm/amdkfd: CRIU allow external mm for svm ranges
  drm/amdkfd: use user_gpu_id for svm ranges
  drm/amdkfd: CRIU Discover svm ranges
  drm/amdkfd: CRIU Save Shared Virtual Memory ranges
  drm/amdkfd: CRIU prepare for svm resume
  drm/amdkfd: CRIU resume shared virtual memory ranges

 arch/x86/configs/rock-dbg_defconfig   |   53 +-
 arch/x86/configs/rock-rel_defconfig   | 4927 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|6 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   51 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |   20 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |2 +
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 1453 -
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   |2 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  185 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |   18 +-
 drivers/gpu/drm/amd/amdkfd/kfd_events.c   |  313 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  |   14 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  |   72 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |   74 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |   89 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |   81 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  166 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |   86 +-
 .../amd/amdkfd/kfd_process_queue_manager.c|  377 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  326 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h  |   39 +
 include/uapi/linux/kfd_ioctl.h|   79 +-
 22 files changed, 8099 insertions(+), 334 deletions(-)
 create mode 100644 arch/x86/configs/rock-rel_defconfig

-- 
2.17.1

[Patch v4 01/24] x86/configs: CRIU update debug rock defconfig

 - Update debug config for Checkpoint-Restore (CR) support
 - Also include necessary options for CR with docker containers.

Signed-off-by: Rajneesh Bhardwaj 
---
 arch/x86/configs/rock-dbg_defconfig | 53 ++---
 1 file changed, 34 insertions(+), 19 deletions(-)

diff --git a/arch/x86/configs/rock-dbg_defconfig 
b/arch/x86/configs/rock-dbg_defconfig
index 4877da183599..bc2a34666c1d 100644
--- a/arch/x86/configs/rock-dbg_defconfig
+++ b/arch/x86/configs/rock-dbg_defconfig
@@ -249,6 +249,7 @@ CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
 CONFIG_KALLSYMS_BASE_RELATIVE=y
 # CONFIG_USERFAULTFD is not set
+CONFIG_USERFAULTFD=y
 CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
 CONFIG_KCMP=y
 CONFIG_RSEQ=y
@@ -1015,6 +1016,11 @@ CONFIG_PACKET_DIAG=y
 CONFIG_UNIX=y
 CONFIG_UNIX_SCM=y
 CONFIG_UNIX_DIAG=y
+CONFIG_SMC_DIAG=y
+CONFIG_XDP_SOCKETS_DIAG=y
+CONFIG_INET_MPTCP_DIAG=y
+CONFIG_TIPC_DIAG=y
+CONFIG_VSOCKETS_DIAG=y
 # CONFIG_TLS is not set
 CONFIG_XFRM=y
 CONFIG_XFRM_ALGO=y
@@ -1052,15 +1058,17 @@ CONFIG_SYN_COOKIES=y
 # CONFIG_NET_IPVTI is not set
 # CONFIG_NET_FOU is not set
 # CONFIG_NET_FOU_IP_TUNNELS is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-CONFIG_INET_TUNNEL=y
-CONFIG_INET_DIAG=y
-CONFIG_INET_TCP_DIAG=y
-# CONFIG_INET_UDP_DIAG is not set
-# CONFIG_INET_RAW_DIAG is not set
-# CONFIG_INET_DIAG_DESTROY is not set
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_ESP_OFFLOAD=m
+CONFIG_INET_TUNNEL=m
+CONFIG_INET_XFRM_TUNNEL=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_TCP_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_INET_RAW_DIAG=m
+CONFIG_INET_DIAG_DESTROY=y
 CONFIG_TCP_CONG_ADVANCED=y
 # CONFIG_TCP_CONG_BIC is not set
 CONFIG_TCP_CONG_CUBIC=y
@@ -1085,12 +1093,14 @@ CONFIG_TCP_MD5SIG=y
 CONFIG_IPV6=y
 # CONFIG_IPV6_ROUTER_PREF is not set
 # CONFIG_IPV6_OPTIMISTIC_DAD is not set
-CONFIG_INET6_AH=y
-CONFIG_INET6_ESP=y
-# CONFIG_INET6_ESP_OFFLOAD is not set
-# CONFIG_INET6_ESPINTCP is not set
-# CONFIG_INET6_IPCOMP is not set
-# CONFIG_IPV6_MIP6 is not set
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_TUNNEL=m
+CONFIG_INET_DCCP_DIAG=m
+CONFIG_INET_SCTP_DIAG=m
 # CONFIG_IPV6_ILA is not set
 # CONFIG_IPV6_VTI is not set
 CONFIG_IPV6_SIT=y
@@ -1146,8 +1156,13 @@ CONFIG_NF_CT_PROTO_UDPLITE=y
 # CONFIG_NF_CONNTRACK_SANE is not set
 # CONFIG_NF_CONNTRACK_SIP is not set
 # CONFIG_NF_CONNTRACK_TFTP is not set
-# CONFIG_NF_CT_NETLINK is not set
-# CONFIG_NF_CT_NETLINK_TIMEOUT is not set
+CONFIG_COMPAT_NETLINK_MESSAGES=y
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_CT_NETLINK_HELPER=m
+CONFIG_NETFILTER_NETLINK_GLUE_CT=y
+CONFIG_SCSI_NETLINK=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_NF_NAT=m
 CONFIG_NF_NAT_REDIRECT=y
 CONFIG_NF_NAT_MASQUERADE=y
@@ -1992,7 +2007,7 @@ CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_NETPOLL=y
 CONFIG_NET_POLL_CONTROLLER=y
 # CONFIG_RIONET is not set
-# CONFIG_TUN is not set
+CONFIG_TUN=y
 # CONFIG_TUN_VNET_CROSS_LE is not set
 CONFIG_VETH=y
 # CONFIG_NLMON is not set
@@ -3990,7 +4005,7 @@ CONFIG_MANDATORY_FILE_LOCKING=y
 CONFIG_FSNOTIFY=y
 CONFIG_DNOTIFY=y
 CONFIG_INOTIFY_USER=y
-# CONFIG_FANOTIFY is not set
+CONFIG_FANOTIFY=y
 CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
-- 
2.17.1

[Patch v4 07/24] drm/amdkfd: CRIU Implement KFD resume ioctl

This adds support to create userptr BOs on restore and introduces a new
ioctl to restart memory notifiers for the restored userptr BOs.
When doing CRIU restore MMU notifications can happen anytime after we call
amdgpu_mn_register. Prevent MMU notifications until we reach stage-4 of the
restore process i.e. criu_resume ioctl is received, and the process is
ready to be resumed. This ioctl is different from other KFD CRIU ioctls
since its called by CRIU master restore process for all the target
processes being resumed by CRIU.

Signed-off-by: David Yat Sin 
Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  6 ++-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 51 +--
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 44 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 35 +++--
 5 files changed, 123 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index fcbc8a9c9e06..5c5fc839f701 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -131,6 +131,7 @@ struct amdkfd_process_info {
atomic_t evicted_bos;
struct delayed_work restore_userptr_work;
struct pid *pid;
+   bool block_mmu_notifications;
 };
 
 int amdgpu_amdkfd_init(void);
@@ -269,7 +270,7 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void 
*drm_priv);
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
-   uint64_t *offset, uint32_t flags);
+   uint64_t *offset, uint32_t flags, bool criu_resume);
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
uint64_t *size);
@@ -297,6 +298,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device 
*adev,
 int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
struct tile_config *config);
 void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev);
+void amdgpu_amdkfd_block_mmu_notifications(void *p);
+int amdgpu_amdkfd_criu_resume(void *p);
+
 #if IS_ENABLED(CONFIG_HSA_AMD)
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 90b985436878..5679fb75ec88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -846,7 +846,8 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem 
*mem,
  *
  * Returns 0 for success, negative errno for errors.
  */
-static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
+  bool criu_resume)
 {
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
@@ -868,6 +869,17 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t 
user_addr)
goto out;
}
 
+   if (criu_resume) {
+   /*
+* During a CRIU restore operation, the userptr buffer objects
+* will be validated in the restore_userptr_work worker at a
+* later stage when it is scheduled by another ioctl called by
+* CRIU master process for the target pid for restore.
+*/
+   atomic_inc(&mem->invalid);
+   mutex_unlock(&process_info->lock);
+   return 0;
+   }
ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
@@ -1240,6 +1252,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void 
**process_info,
INIT_DELAYED_WORK(&info->restore_userptr_work,
  amdgpu_amdkfd_restore_userptr_worker);
 
+   info->block_mmu_notifications = false;
*process_info = info;
*ef = dma_fence_get(&info->eviction_fence->base);
}
@@ -1456,10 +1469,37 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void 
*drm_priv)
return avm->pd_phys_addr;
 }
 
+void amdgpu_amdkfd_block_mmu_notifications(void *p)
+{
+   struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+   pinfo->block_mmu_notifications = true;
+}
+
+int amdgpu_amdkfd_criu_resume(void *p)
+{
+   int ret = 0;
+   struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+   mutex_lock(&pinfo->lock);
+   pr_debug("scheduling work\n");
+   atomic_inc(&pinfo->evicted_bos);
+   if (!p

[Patch v4 03/24] drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs

Checkpoint-Restore in userspace (CRIU) is a powerful tool that can
snapshot a running process and later restore it on same or a remote
machine but expects the processes that have a device file (e.g. GPU)
associated with them, provide necessary driver support to assist CRIU
and its extensible plugin interface. Thus, In order to support the
Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute
Kernel driver, needs to provide a set of new APIs that provide
necessary VRAM metadata and its contents to a userspace component
(CRIU plugin) that can store it in form of image files.

This introduces some new ioctls which will be used to checkpoint-Restore
any KFD bound user process. KFD doesn't allow any arbitrary ioctl call
unless it is called by the group leader process. Since these ioctls are
expected to be called from a KFD criu plugin which has elevated ptrace
attached privileges and CAP_CHECKPOINT_RESTORE capabilities attached with
the file descriptors so modify KFD to allow such calls.

(API redesigned by David Yat Sin)
Suggested-by: Felix Kuehling 
Signed-off-by: David Yat Sin 
Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 94 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 65 +++-
 include/uapi/linux/kfd_ioctl.h   | 79 +++-
 3 files changed, 235 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 4bfc0c8ab764..1b863bd84c96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include "kfd_priv.h"
@@ -1856,6 +1857,75 @@ static int kfd_ioctl_svm(struct file *filep, struct 
kfd_process *p, void *data)
 }
 #endif
 
+static int criu_checkpoint(struct file *filep,
+  struct kfd_process *p,
+  struct kfd_ioctl_criu_args *args)
+{
+   return 0;
+}
+
+static int criu_restore(struct file *filep,
+   struct kfd_process *p,
+   struct kfd_ioctl_criu_args *args)
+{
+   return 0;
+}
+
+static int criu_unpause(struct file *filep,
+   struct kfd_process *p,
+   struct kfd_ioctl_criu_args *args)
+{
+   return 0;
+}
+
+static int criu_resume(struct file *filep,
+   struct kfd_process *p,
+   struct kfd_ioctl_criu_args *args)
+{
+   return 0;
+}
+
+static int criu_process_info(struct file *filep,
+   struct kfd_process *p,
+   struct kfd_ioctl_criu_args *args)
+{
+   return 0;
+}
+
+static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void 
*data)
+{
+   struct kfd_ioctl_criu_args *args = data;
+   int ret;
+
+   dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);
+   switch (args->op) {
+   case KFD_CRIU_OP_PROCESS_INFO:
+   ret = criu_process_info(filep, p, args);
+   break;
+   case KFD_CRIU_OP_CHECKPOINT:
+   ret = criu_checkpoint(filep, p, args);
+   break;
+   case KFD_CRIU_OP_UNPAUSE:
+   ret = criu_unpause(filep, p, args);
+   break;
+   case KFD_CRIU_OP_RESTORE:
+   ret = criu_restore(filep, p, args);
+   break;
+   case KFD_CRIU_OP_RESUME:
+   ret = criu_resume(filep, p, args);
+   break;
+   default:
+   dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", 
args->op);
+   ret = -EINVAL;
+   break;
+   }
+
+   if (ret)
+   dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, 
ret);
+
+   return ret;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -1959,6 +2029,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
kfd_ioctl_set_xnack_mode, 0),
+
+   AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
+   kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls)
@@ -1973,6 +2046,7 @@ static long kfd_ioctl(struct file *filep, unsigned int 
cmd, unsigned long arg)
char *kdata = NULL;
unsigned int usize, asize;
int retcode = -EINVAL;
+   bool ptrace_attached = false;
 
if (nr >= AMDKFD_CORE_IOCTL_COUNT)
goto err_i1;
@@ -1998,7 +2072,15 @@ static long kfd_ioctl(struct file *filep, unsigned int 
cmd, unsigned long arg)
 * processes need to create their own KFD device context.
 */
process = filep->private_data;
-   if (process->lead_thread != curren

[Patch v4 10/24] drm/amdkfd: CRIU restore queue ids

From: David Yat Sin 

When re-creating queues during CRIU restore, restore the queue with the
same queue id value used during CRIU dump.

Signed-off-by: Rajneesh Bhardwaj 
Signed-off-by: David Yat Sin 

---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +
 .../amd/amdkfd/kfd_process_queue_manager.c| 37 +++
 4 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 9665c8657929..3fb155f756fd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -312,7 +312,7 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
p->pasid,
dev->id);
 
-   err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
+   err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, 
NULL,
&doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 1e30717b5253..0c50e67e2b51 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
properties.type = KFD_QUEUE_TYPE_DIQ;
 
status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
-   &properties, &qid, NULL);
+   &properties, &qid, NULL, NULL);
 
if (status) {
pr_err("Failed to create DIQ\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7c2679a23aa3..8272bd5c4600 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -461,6 +461,7 @@ enum KFD_QUEUE_PRIORITY {
  * it's user mode or kernel mode queue.
  *
  */
+
 struct queue_properties {
enum kfd_queue_type type;
enum kfd_queue_format format;
@@ -1156,6 +1157,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+   const struct kfd_criu_queue_priv_data *q_data,
uint32_t *p_doorbell_offset_in_process);
 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
 int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned 
int qid,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 480ad794df4e..275aeebc58fa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -42,6 +42,20 @@ static inline struct process_queue_node *get_queue_by_qid(
return NULL;
 }
 
+static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
+   unsigned int qid)
+{
+   if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+   return -EINVAL;
+
+   if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
+   pr_err("Cannot create new queue because requested qid(%u) is in 
use\n", qid);
+   return -ENOSPC;
+   }
+
+   return 0;
+}
+
 static int find_available_queue_slot(struct process_queue_manager *pqm,
unsigned int *qid)
 {
@@ -194,6 +208,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+   const struct kfd_criu_queue_priv_data *q_data,
uint32_t *p_doorbell_offset_in_process)
 {
int retval;
@@ -225,7 +240,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (pdd->qpd.queue_count >= max_queues)
return -ENOSPC;
 
-   retval = find_available_queue_slot(pqm, qid);
+   if (q_data) {
+   retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
+   *qid = q_data->q_id;
+   } else
+   retval = find_available_queue_slot(pqm, qid);
+
if (retval != 0)
return retval;
 
@@ -528,7 +548,7 @@ int kfd_process_get_queue_info(struct kfd_process *p,
return 0;
 }
 
-static void criu_dump_queue(struct kfd_process_device *pdd,
+static void criu_checkpoint_queue(struct kfd_process_device *pdd,
   struct queue *q,
   struct kfd_criu_queue_priv_data *q_data)
 {
@@ -560,7 +580,7 @@ static void criu_dump_queue(struct kfd_proces

[Patch v4 09/24] drm/amdkfd: CRIU add queues support

From: David Yat Sin 

Add support to existing CRIU ioctl's to save number of queues and queue
properties for each queue during checkpoint and re-create queues on
restore.

Signed-off-by: David Yat Sin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 110 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  43 +++-
 .../amd/amdkfd/kfd_process_queue_manager.c| 212 ++
 3 files changed, 357 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index db2bb302a8d4..9665c8657929 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2006,19 +2006,36 @@ static int criu_checkpoint_bos(struct kfd_process *p,
return ret;
 }
 
-static void criu_get_process_object_info(struct kfd_process *p,
-uint32_t *num_bos,
-uint64_t *objs_priv_size)
+static int criu_get_process_object_info(struct kfd_process *p,
+   uint32_t *num_bos,
+   uint32_t *num_objects,
+   uint64_t *objs_priv_size)
 {
+   int ret;
uint64_t priv_size;
+   uint32_t num_queues, num_events, num_svm_ranges;
+   uint64_t queues_priv_data_size;
 
*num_bos = get_process_num_bos(p);
 
+   ret = kfd_process_get_queue_info(p, &num_queues, 
&queues_priv_data_size);
+   if (ret)
+   return ret;
+
+   num_events = 0; /* TODO: Implement Events */
+   num_svm_ranges = 0; /* TODO: Implement SVM-Ranges */
+
+   *num_objects = num_queues + num_events + num_svm_ranges;
+
if (objs_priv_size) {
priv_size = sizeof(struct kfd_criu_process_priv_data);
priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
+   priv_size += queues_priv_data_size;
+   /* TODO: Add Events priv size */
+   /* TODO: Add SVM ranges priv size */
*objs_priv_size = priv_size;
}
+   return 0;
 }
 
 static int criu_checkpoint(struct file *filep,
@@ -2026,7 +2043,7 @@ static int criu_checkpoint(struct file *filep,
   struct kfd_ioctl_criu_args *args)
 {
int ret;
-   uint32_t num_bos;
+   uint32_t num_bos, num_objects;
uint64_t priv_size, priv_offset = 0;
 
if (!args->bos || !args->priv_data)
@@ -2048,9 +2065,12 @@ static int criu_checkpoint(struct file *filep,
goto exit_unlock;
}
 
-   criu_get_process_object_info(p, &num_bos, &priv_size);
+   ret = criu_get_process_object_info(p, &num_bos, &num_objects, 
&priv_size);
+   if (ret)
+   goto exit_unlock;
 
if (num_bos != args->num_bos ||
+   num_objects != args->num_objects ||
priv_size != args->priv_data_size) {
 
ret = -EINVAL;
@@ -2067,6 +2087,17 @@ static int criu_checkpoint(struct file *filep,
if (ret)
goto exit_unlock;
 
+   if (num_objects) {
+   ret = kfd_criu_checkpoint_queues(p, (uint8_t __user 
*)args->priv_data,
+&priv_offset);
+   if (ret)
+   goto exit_unlock;
+
+   /* TODO: Dump Events */
+
+   /* TODO: Dump SVM-Ranges */
+   }
+
 exit_unlock:
mutex_unlock(&p->mutex);
if (ret)
@@ -2340,6 +2371,62 @@ static int criu_restore_bos(struct kfd_process *p,
return ret;
 }
 
+static int criu_restore_objects(struct file *filep,
+   struct kfd_process *p,
+   struct kfd_ioctl_criu_args *args,
+   uint64_t *priv_offset,
+   uint64_t max_priv_data_size)
+{
+   int ret = 0;
+   uint32_t i;
+
+   BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));
+   BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));
+   BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, 
object_type));
+
+   for (i = 0; i < args->num_objects; i++) {
+   uint32_t object_type;
+
+   if (*priv_offset + sizeof(object_type) > max_priv_data_size) {
+   pr_err("Invalid private data size\n");
+   return -EINVAL;
+   }
+
+   ret = get_user(object_type, (uint32_t __user *)(args->priv_data 
+ *priv_offset));
+   if (ret) {
+   pr_err("Failed to copy private information from 
user\n");
+   goto exit;
+   }
+
+   switch (object_type) {
+   case KFD_CRIU_OBJECT_TYPE_QUEUE:
+   ret = kfd_criu_restore_queue(p, (uint8_t __user 
*)args->priv_data,
+priv_offse

[Patch v4 04/24] drm/amdkfd: CRIU Implement KFD process_info ioctl

This IOCTL is expected to be called as a precursor to the actual
Checkpoint operation. This does the basic discovery into the target
process seized by CRIU and relays the information to the userspace that
utilizes it to start the Checkpoint operation via another dedicated
IOCTL.

The process_info IOCTL determines the number of GPUs, buffer objects
that are associated with the target process, its process id in
caller's namespace since /proc/pid/mem interface maybe used to drain
the contents of the discovered buffer objects in userspace and getpid
returns the pid of CRIU dumper process. Also the pid of a process
inside a container might be different than its global pid so return
the ns pid.

Signed-off-by: Rajneesh Bhardwaj 
Signed-off-by: David Yat Sin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 55 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  2 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 ++
 3 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 1b863bd84c96..53d7a20e3c06 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1857,6 +1857,41 @@ static int kfd_ioctl_svm(struct file *filep, struct 
kfd_process *p, void *data)
 }
 #endif
 
+uint64_t get_process_num_bos(struct kfd_process *p)
+{
+   uint64_t num_of_bos = 0, i;
+
+   /* Run over all PDDs of the process */
+   for (i = 0; i < p->n_pdds; i++) {
+   struct kfd_process_device *pdd = p->pdds[i];
+   void *mem;
+   int id;
+
+   idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+   struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+
+   if ((uint64_t)kgd_mem->va > pdd->gpuvm_base)
+   num_of_bos++;
+   }
+   }
+   return num_of_bos;
+}
+
+static void criu_get_process_object_info(struct kfd_process *p,
+uint32_t *num_bos,
+uint64_t *objs_priv_size)
+{
+   uint64_t priv_size;
+
+   *num_bos = get_process_num_bos(p);
+
+   if (objs_priv_size) {
+   priv_size = sizeof(struct kfd_criu_process_priv_data);
+   priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
+   *objs_priv_size = priv_size;
+   }
+}
+
 static int criu_checkpoint(struct file *filep,
   struct kfd_process *p,
   struct kfd_ioctl_criu_args *args)
@@ -1889,7 +1924,25 @@ static int criu_process_info(struct file *filep,
struct kfd_process *p,
struct kfd_ioctl_criu_args *args)
 {
-   return 0;
+   int ret = 0;
+
+   mutex_lock(&p->mutex);
+
+   if (!kfd_has_process_device_data(p)) {
+   pr_err("No pdd for given process\n");
+   ret = -ENODEV;
+   goto err_unlock;
+   }
+
+   args->pid = task_pid_nr_ns(p->lead_thread,
+   task_active_pid_ns(p->lead_thread));
+
+   criu_get_process_object_info(p, &args->num_bos, &args->priv_data_size);
+
+   dev_dbg(kfd_device, "Num of bos:%u\n", args->num_bos);
+err_unlock:
+   mutex_unlock(&p->mutex);
+   return ret;
 }
 
 static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void 
*data)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index e68f692362bb..4d9bc7af03af 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -950,6 +950,8 @@ void *kfd_process_device_translate_handle(struct 
kfd_process_device *p,
 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
int handle);
 
+bool kfd_has_process_device_data(struct kfd_process *p);
+
 /* PASIDs */
 int kfd_pasid_init(void);
 void kfd_pasid_exit(void);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d4c8a6948a9f..f77d556ca0fc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1456,6 +1456,20 @@ static int init_doorbell_bitmap(struct 
qcm_process_device *qpd,
return 0;
 }
 
+bool kfd_has_process_device_data(struct kfd_process *p)
+{
+   int i;
+
+   for (i = 0; i < p->n_pdds; i++) {
+   struct kfd_process_device *pdd = p->pdds[i];
+
+   if (pdd)
+   return true;
+   }
+
+   return false;
+}
+
 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process *p)
 {
-- 
2.17.1

[Patch v4 13/24] drm/amdkfd: CRIU checkpoint and restore queue mqds

From: David Yat Sin 

Checkpoint contents of queue MQD's on CRIU dump and restore them during
CRIU restore.

Signed-off-by: David Yat Sin 

---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   |   2 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  72 +++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  14 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  |   7 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  |  67 
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  68 
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  68 
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |  69 
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   5 +
 .../amd/amdkfd/kfd_process_queue_manager.c| 158 --
 11 files changed, 506 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 3fb155f756fd..146879cd3f2b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -312,7 +312,7 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
p->pasid,
dev->id);
 
-   err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, 
NULL,
+   err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, 
NULL, NULL,
&doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 0c50e67e2b51..3a5303ebcabf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
properties.type = KFD_QUEUE_TYPE_DIQ;
 
status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
-   &properties, &qid, NULL, NULL);
+   &properties, &qid, NULL, NULL, NULL);
 
if (status) {
pr_err("Failed to create DIQ\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a0f5b8533a03..a92274f9f1f7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -331,7 +331,8 @@ static void deallocate_vmid(struct device_queue_manager 
*dqm,
 static int create_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd,
-   const struct kfd_criu_queue_priv_data *qd)
+   const struct kfd_criu_queue_priv_data *qd,
+   const void *restore_mqd)
 {
struct mqd_manager *mqd_mgr;
int retval;
@@ -390,8 +391,14 @@ static int create_queue_nocpsch(struct 
device_queue_manager *dqm,
retval = -ENOMEM;
goto out_deallocate_doorbell;
}
-   mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
-   &q->gart_mqd_addr, &q->properties);
+
+   if (qd)
+   mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 
&q->gart_mqd_addr,
+&q->properties, restore_mqd);
+   else
+   mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
+   &q->gart_mqd_addr, &q->properties);
+
if (q->properties.is_active) {
if (!dqm->sched_running) {
WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
@@ -1339,7 +1346,8 @@ static void destroy_kernel_queue_cpsch(struct 
device_queue_manager *dqm,
 
 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue 
*q,
struct qcm_process_device *qpd,
-   const struct kfd_criu_queue_priv_data *qd)
+   const struct kfd_criu_queue_priv_data *qd,
+   const void *restore_mqd)
 {
int retval;
struct mqd_manager *mqd_mgr;
@@ -1385,8 +1393,12 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
 * updates the is_evicted flag but is a no-op otherwise.
 */
q->properties.is_evicted = !!qpd->evicted;
-   mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
-   &q->gart_mqd_addr, &q->properties);
+   if (qd)
+   mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 
&q->gart_mqd_addr,
+&q->properties, restore_mqd);
+   else
+   mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
+   &q->gart_mqd_addr, &q->properties);
 
list_add(&q->list, &qpd->que

[Patch v4 05/24] drm/amdkfd: CRIU Implement KFD checkpoint ioctl

This adds support to discover the  buffer objects that belong to a
process being checkpointed. The data corresponding to these buffer
objects is returned to user space plugin running under criu master
context which then stores this info to recreate these buffer objects
during a restore operation.

Signed-off-by: David Yat Sin 
Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  |  20 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 172 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   3 +-
 4 files changed, 195 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 56c5c4464829..4fd36bd9dcfd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1173,6 +1173,26 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device 
*bdev,
return ttm_pool_free(&adev->mman.bdev.pool, ttm);
 }
 
+/**
+ * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current
+ * task
+ *
+ * @tbo: The ttm_buffer_object that contains the userptr
+ * @user_addr:  The returned value
+ */
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+ uint64_t *user_addr)
+{
+   struct amdgpu_ttm_tt *gtt;
+
+   if (!tbo->ttm)
+   return -EINVAL;
+
+   gtt = (void *)tbo->ttm;
+   *user_addr = gtt->userptr;
+   return 0;
+}
+
 /**
  * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
  * task
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 7346ecff4438..6e6d67ec43f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -177,6 +177,8 @@ static inline bool amdgpu_ttm_tt_get_user_pages_done(struct 
ttm_tt *ttm)
 #endif
 
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+ uint64_t *user_addr);
 int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
  uint64_t addr, uint32_t flags);
 bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 53d7a20e3c06..cdbb92972338 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -42,6 +42,7 @@
 #include "kfd_svm.h"
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
+#include "amdgpu_object.h"
 
 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
 static int kfd_open(struct inode *, struct file *);
@@ -1857,6 +1858,29 @@ static int kfd_ioctl_svm(struct file *filep, struct 
kfd_process *p, void *data)
 }
 #endif
 
+static int criu_checkpoint_process(struct kfd_process *p,
+uint8_t __user *user_priv_data,
+uint64_t *priv_offset)
+{
+   struct kfd_criu_process_priv_data process_priv;
+   int ret;
+
+   memset(&process_priv, 0, sizeof(process_priv));
+
+   process_priv.version = KFD_CRIU_PRIV_VERSION;
+
+   ret = copy_to_user(user_priv_data + *priv_offset,
+   &process_priv, sizeof(process_priv));
+
+   if (ret) {
+   pr_err("Failed to copy process information to user\n");
+   ret = -EFAULT;
+   }
+
+   *priv_offset += sizeof(process_priv);
+   return ret;
+}
+
 uint64_t get_process_num_bos(struct kfd_process *p)
 {
uint64_t num_of_bos = 0, i;
@@ -1877,6 +1901,111 @@ uint64_t get_process_num_bos(struct kfd_process *p)
return num_of_bos;
 }
 
+static int criu_checkpoint_bos(struct kfd_process *p,
+  uint32_t num_bos,
+  uint8_t __user *user_bos,
+  uint8_t __user *user_priv_data,
+  uint64_t *priv_offset)
+{
+   struct kfd_criu_bo_bucket *bo_buckets;
+   struct kfd_criu_bo_priv_data *bo_privs;
+   int ret = 0, pdd_index, bo_index = 0, id;
+   void *mem;
+
+   bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
+   if (!bo_buckets) {
+   ret = -ENOMEM;
+   goto exit;
+   }
+
+   bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);
+   if (!bo_privs) {
+   ret = -ENOMEM;
+   goto exit;
+   }
+
+   for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
+   struct kfd_process_device *pdd = p->pdds[pdd_index];
+   struct amdgpu_bo *dumper_bo;
+   struct kgd_mem *kgd_mem;
+
+   idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+   struct kfd_criu_bo_bucket *bo_bucket;
+   struct kfd_criu_bo_priv_data

[Patch v4 16/24] drm/amdkfd: CRIU implement gpu_id remapping

From: David Yat Sin 

When doing a restore on a different node, the gpu_id's on the restore
node may be different. But the user space application will still refer
use the original gpu_id's in the ioctl calls. Adding code to create a
gpu id mapping so that kfd can determine actual gpu_id during the user
ioctl's.

Signed-off-by: David Yat Sin 
Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 465 --
 drivers/gpu/drm/amd/amdkfd/kfd_events.c   |  45 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  11 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  32 ++
 .../amd/amdkfd/kfd_process_queue_manager.c|  18 +-
 5 files changed, 412 insertions(+), 159 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 08467fa2f514..20652d488cde 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -294,18 +294,20 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
return err;
 
pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
-   dev = kfd_device_by_id(args->gpu_id);
-   if (!dev) {
-   pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
-   return -EINVAL;
-   }
 
mutex_lock(&p->mutex);
+   pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+   if (!pdd) {
+   pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+   err = -EINVAL;
+   goto err_unlock;
+   }
+   dev = pdd->dev;
 
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
err = -ESRCH;
-   goto err_bind_process;
+   goto err_unlock;
}
 
pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
@@ -315,7 +317,7 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, 
NULL, NULL, NULL,
&doorbell_offset_in_process);
if (err != 0)
-   goto err_create_queue;
+   goto err_unlock;
 
args->queue_id = queue_id;
 
@@ -344,8 +346,7 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
 
return 0;
 
-err_create_queue:
-err_bind_process:
+err_unlock:
mutex_unlock(&p->mutex);
return err;
 }
@@ -492,7 +493,6 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
struct kfd_process *p, void *data)
 {
struct kfd_ioctl_set_memory_policy_args *args = data;
-   struct kfd_dev *dev;
int err = 0;
struct kfd_process_device *pdd;
enum cache_policy default_policy, alternate_policy;
@@ -507,13 +507,15 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
return -EINVAL;
}
 
-   dev = kfd_device_by_id(args->gpu_id);
-   if (!dev)
-   return -EINVAL;
-
mutex_lock(&p->mutex);
+   pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+   if (!pdd) {
+   pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+   err = -EINVAL;
+   goto out;
+   }
 
-   pdd = kfd_bind_process_to_device(dev, p);
+   pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd)) {
err = -ESRCH;
goto out;
@@ -526,7 +528,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
   ? cache_policy_coherent : cache_policy_noncoherent;
 
-   if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
+   if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm,
&pdd->qpd,
default_policy,
alternate_policy,
@@ -544,17 +546,18 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
struct kfd_process *p, void *data)
 {
struct kfd_ioctl_set_trap_handler_args *args = data;
-   struct kfd_dev *dev;
int err = 0;
struct kfd_process_device *pdd;
 
-   dev = kfd_device_by_id(args->gpu_id);
-   if (!dev)
-   return -EINVAL;
-
mutex_lock(&p->mutex);
 
-   pdd = kfd_bind_process_to_device(dev, p);
+   pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+   if (!pdd) {
+   err = -EINVAL;
+   goto out;
+   }
+
+   pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd)) {
err = -ESRCH;
goto out;
@@ -578,16 +581,20 @@ static int kfd_ioctl_dbg_register(struct file *filep,
bool create_ok;
long status = 0;
 
-   dev = kfd_device_by_id(args->gpu_id);
-   if (!dev)
-

[Patch v4 06/24] drm/amdkfd: CRIU Implement KFD restore ioctl

This implements the KFD CRIU Restore ioctl that lays the basic
foundation for the CRIU restore operation. It provides support to
create the buffer objects corresponding to Non-Paged system memory
mapped for GPU and/or CPU access and lays basic foundation for the
userptrs buffer objects which will be added in a separate patch.
This ioctl creates various types of buffer objects such as VRAM,
MMIO, Doorbell, GTT based on the date sent from the userspace plugin.
The data mostly contains the previously checkpointed KFD images from
some KFD processs.

While restoring a criu process, attach old IDR values to newly
created BOs. This also adds the minimal gpu mapping support for a single
gpu checkpoint restore use case.

Signed-off-by: David Yat Sin 
Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 298 ++-
 1 file changed, 297 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index cdbb92972338..c93f74ad073f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2069,11 +2069,307 @@ static int criu_checkpoint(struct file *filep,
return ret;
 }
 
+static int criu_restore_process(struct kfd_process *p,
+   struct kfd_ioctl_criu_args *args,
+   uint64_t *priv_offset,
+   uint64_t max_priv_data_size)
+{
+   int ret = 0;
+   struct kfd_criu_process_priv_data process_priv;
+
+   if (*priv_offset + sizeof(process_priv) > max_priv_data_size)
+   return -EINVAL;
+
+   ret = copy_from_user(&process_priv,
+   (void __user *)(args->priv_data + *priv_offset),
+   sizeof(process_priv));
+   if (ret) {
+   pr_err("Failed to copy process private information from 
user\n");
+   ret = -EFAULT;
+   goto exit;
+   }
+   *priv_offset += sizeof(process_priv);
+
+   if (process_priv.version != KFD_CRIU_PRIV_VERSION) {
+   pr_err("Invalid CRIU API version (checkpointed:%d 
current:%d)\n",
+   process_priv.version, KFD_CRIU_PRIV_VERSION);
+   return -EINVAL;
+   }
+
+exit:
+   return ret;
+}
+
+static int criu_restore_bos(struct kfd_process *p,
+   struct kfd_ioctl_criu_args *args,
+   uint64_t *priv_offset,
+   uint64_t max_priv_data_size)
+{
+   struct kfd_criu_bo_bucket *bo_buckets;
+   struct kfd_criu_bo_priv_data *bo_privs;
+   bool flush_tlbs = false;
+   int ret = 0, j = 0;
+   uint32_t i;
+
+   if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > 
max_priv_data_size)
+   return -EINVAL;
+
+   bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), 
GFP_KERNEL);
+   if (!bo_buckets)
+   return -ENOMEM;
+
+   ret = copy_from_user(bo_buckets, (void __user *)args->bos,
+args->num_bos * sizeof(*bo_buckets));
+   if (ret) {
+   pr_err("Failed to copy BOs information from user\n");
+   ret = -EFAULT;
+   goto exit;
+   }
+
+   bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
+   if (!bo_privs) {
+   ret = -ENOMEM;
+   goto exit;
+   }
+
+   ret = copy_from_user(bo_privs, (void __user *)args->priv_data + 
*priv_offset,
+args->num_bos * sizeof(*bo_privs));
+   if (ret) {
+   pr_err("Failed to copy BOs information from user\n");
+   ret = -EFAULT;
+   goto exit;
+   }
+   *priv_offset += args->num_bos * sizeof(*bo_privs);
+
+   /* Create and map new BOs */
+   for (i = 0; i < args->num_bos; i++) {
+   struct kfd_criu_bo_bucket *bo_bucket;
+   struct kfd_criu_bo_priv_data *bo_priv;
+   struct kfd_dev *dev;
+   struct kfd_process_device *pdd;
+   void *mem;
+   u64 offset;
+   int idr_handle;
+
+   bo_bucket = &bo_buckets[i];
+   bo_priv = &bo_privs[i];
+
+   dev = kfd_device_by_id(bo_bucket->gpu_id);
+   if (!dev) {
+   ret = -EINVAL;
+   pr_err("Failed to get pdd\n");
+   goto exit;
+   }
+   pdd = kfd_get_process_device_data(dev, p);
+   if (!pdd) {
+   ret = -EINVAL;
+   pr_err("Failed to get pdd\n");
+   goto exit;
+   }
+
+   pr_debug("kfd restore ioctl - bo_bucket[%d]:\n", i);
+   pr_debug("size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"
+   "gpu_id = 0x%x alloc_flags = 0x%x\n"
+

[Patch v4 08/24] drm/amdkfd: CRIU Implement KFD unpause operation

From: David Yat Sin 

Introducing UNPAUSE op. After CRIU amdgpu plugin performs a PROCESS_INFO
op the queues will be stay in an evicted state. Once the plugin is done
draining BO contents, it is safe to perform an UNPAUSE op for the queues
to resume.

Signed-off-by: David Yat Sin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 37 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  3 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  1 +
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 87b9f019e96e..db2bb302a8d4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2040,6 +2040,14 @@ static int criu_checkpoint(struct file *filep,
goto exit_unlock;
}
 
+   /* Confirm all process queues are evicted */
+   if (!p->queues_paused) {
+   pr_err("Cannot dump process when queues are not in evicted 
state\n");
+   /* CRIU plugin did not call op PROCESS_INFO before 
checkpointing */
+   ret = -EINVAL;
+   goto exit_unlock;
+   }
+
criu_get_process_object_info(p, &num_bos, &priv_size);
 
if (num_bos != args->num_bos ||
@@ -2382,7 +2390,24 @@ static int criu_unpause(struct file *filep,
struct kfd_process *p,
struct kfd_ioctl_criu_args *args)
 {
-   return 0;
+   int ret;
+
+   mutex_lock(&p->mutex);
+
+   if (!p->queues_paused) {
+   mutex_unlock(&p->mutex);
+   return -EINVAL;
+   }
+
+   ret = kfd_process_restore_queues(p);
+   if (ret)
+   pr_err("Failed to unpause queues ret:%d\n", ret);
+   else
+   p->queues_paused = false;
+
+   mutex_unlock(&p->mutex);
+
+   return ret;
 }
 
 static int criu_resume(struct file *filep,
@@ -2434,6 +2459,12 @@ static int criu_process_info(struct file *filep,
goto err_unlock;
}
 
+   ret = kfd_process_evict_queues(p);
+   if (ret)
+   goto err_unlock;
+
+   p->queues_paused = true;
+
args->pid = task_pid_nr_ns(p->lead_thread,
task_active_pid_ns(p->lead_thread));
 
@@ -2441,6 +2472,10 @@ static int criu_process_info(struct file *filep,
 
dev_dbg(kfd_device, "Num of bos:%u\n", args->num_bos);
 err_unlock:
+   if (ret) {
+   kfd_process_restore_queues(p);
+   p->queues_paused = false;
+   }
mutex_unlock(&p->mutex);
return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index cd72541a8f4f..f3a9f3de34e4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -875,6 +875,9 @@ struct kfd_process {
struct svm_range_list svms;
 
bool xnack_enabled;
+
+   /* Queues are in paused stated because we are in the process of doing a 
CRIU checkpoint */
+   bool queues_paused;
 };
 
 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d2fcdc5e581f..e20fbb7ba9bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1364,6 +1364,7 @@ static struct kfd_process *create_process(const struct 
task_struct *thread)
process->mm = thread->mm;
process->lead_thread = thread->group_leader;
process->n_pdds = 0;
+   process->queues_paused = false;
INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
process->last_restore_timestamp = get_jiffies_64();
-- 
2.17.1

[Patch v4 17/24] drm/amdkfd: CRIU export BOs as prime dmabuf objects

KFD buffer objects do not associate a GEM handle with them so cannot
directly be used with libdrm to initiate a system dma (sDMA) operation
to speedup the checkpoint and restore operation so export them as dmabuf
objects and use with libdrm helper (amdgpu_bo_import) to further process
the sdma command submissions.

With sDMA, we see huge improvement in checkpoint and restore operations
compared to the generic pci based access via host data path.

Suggested-by: Felix Kuehling 
Signed-off-by: Rajneesh Bhardwaj 
Signed-off-by: David Yat Sin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 71 +++-
 1 file changed, 69 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 20652d488cde..178b0ccfb286 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
@@ -43,6 +44,7 @@
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
 #include "amdgpu_object.h"
+#include "amdgpu_dma_buf.h"
 
 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
 static int kfd_open(struct inode *, struct file *);
@@ -1932,6 +1934,33 @@ uint64_t get_process_num_bos(struct kfd_process *p)
return num_of_bos;
 }
 
+static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags,
+ u32 *shared_fd)
+{
+   struct dma_buf *dmabuf;
+   int ret;
+
+   dmabuf = amdgpu_gem_prime_export(gobj, flags);
+   if (IS_ERR(dmabuf)) {
+   ret = PTR_ERR(dmabuf);
+   pr_err("dmabuf export failed for the BO\n");
+   return ret;
+   }
+
+   ret = dma_buf_fd(dmabuf, flags);
+   if (ret < 0) {
+   pr_err("dmabuf create fd failed, ret:%d\n", ret);
+   goto out_free_dmabuf;
+   }
+
+   *shared_fd = ret;
+   return 0;
+
+out_free_dmabuf:
+   dma_buf_put(dmabuf);
+   return ret;
+}
+
 static int criu_checkpoint_bos(struct kfd_process *p,
   uint32_t num_bos,
   uint8_t __user *user_bos,
@@ -1992,6 +2021,14 @@ static int criu_checkpoint_bos(struct kfd_process *p,
goto exit;
}
}
+   if (bo_bucket->alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+   ret = 
criu_get_prime_handle(&dumper_bo->tbo.base,
+   bo_bucket->alloc_flags &
+   
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
+   &bo_bucket->dmabuf_fd);
+   if (ret)
+   goto exit;
+   }
if (bo_bucket->alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
KFD_MMAP_GPU_ID(pdd->dev->id);
@@ -2031,6 +2068,10 @@ static int criu_checkpoint_bos(struct kfd_process *p,
*priv_offset += num_bos * sizeof(*bo_privs);
 
 exit:
+   while (ret && bo_index--) {
+   if (bo_buckets[bo_index].alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+   close_fd(bo_buckets[bo_index].dmabuf_fd);
+   }
 
kvfree(bo_buckets);
kvfree(bo_privs);
@@ -2131,16 +2172,28 @@ static int criu_checkpoint(struct file *filep,
ret = kfd_criu_checkpoint_queues(p, (uint8_t __user 
*)args->priv_data,
 &priv_offset);
if (ret)
-   goto exit_unlock;
+   goto close_bo_fds;
 
ret = kfd_criu_checkpoint_events(p, (uint8_t __user 
*)args->priv_data,
 &priv_offset);
if (ret)
-   goto exit_unlock;
+   goto close_bo_fds;
 
/* TODO: Dump SVM-Ranges */
}
 
+close_bo_fds:
+   if (ret) {
+   /* If IOCTL returns err, user assumes all FDs opened in 
criu_dump_bos are closed */
+   uint32_t i;
+   struct kfd_criu_bo_bucket *bo_buckets = (struct 
kfd_criu_bo_bucket *) args->bos;
+
+   for (i = 0; i < num_bos; i++) {
+   if (bo_buckets[i].alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+   close_fd(bo_buckets[i].dmabuf_fd);
+   }
+   }
+
 exit_unlock:
mutex_unlock(&p->mutex);
if (ret)
@@ -2335,6 +2388,7 @@ static int criu_restore_bos(struct kfd_process *p,
struct kfd_criu_bo_priv_data *bo_priv;
struct kfd_dev *dev;

[Patch v4 15/24] drm/amdkfd: CRIU checkpoint and restore events

From: David Yat Sin 

Add support to existing CRIU ioctl's to save and restore events during
criu checkpoint and restore.

Signed-off-by: David Yat Sin 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  70 +-
 drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 272 ---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  27 ++-
 3 files changed, 280 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 582b4a393f95..08467fa2f514 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1009,57 +1009,11 @@ static int kfd_ioctl_create_event(struct file *filp, 
struct kfd_process *p,
 * through the event_page_offset field.
 */
if (args->event_page_offset) {
-   struct kfd_dev *kfd;
-   struct kfd_process_device *pdd;
-   void *mem, *kern_addr;
-   uint64_t size;
-
-   kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
-   if (!kfd) {
-   pr_err("Getting device by id failed in %s\n", __func__);
-   return -EINVAL;
-   }
-
mutex_lock(&p->mutex);
-
-   if (p->signal_page) {
-   pr_err("Event page is already set\n");
-   err = -EINVAL;
-   goto out_unlock;
-   }
-
-   pdd = kfd_bind_process_to_device(kfd, p);
-   if (IS_ERR(pdd)) {
-   err = PTR_ERR(pdd);
-   goto out_unlock;
-   }
-
-   mem = kfd_process_device_translate_handle(pdd,
-   GET_IDR_HANDLE(args->event_page_offset));
-   if (!mem) {
-   pr_err("Can't find BO, offset is 0x%llx\n",
-  args->event_page_offset);
-   err = -EINVAL;
-   goto out_unlock;
-   }
-
-   err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,
-   mem, &kern_addr, &size);
-   if (err) {
-   pr_err("Failed to map event page to kernel\n");
-   goto out_unlock;
-   }
-
-   err = kfd_event_page_set(p, kern_addr, size);
-   if (err) {
-   pr_err("Failed to set event page\n");
-   amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, 
mem);
-   goto out_unlock;
-   }
-
-   p->signal_handle = args->event_page_offset;
-
+   err = kfd_kmap_event_page(p, args->event_page_offset);
mutex_unlock(&p->mutex);
+   if (err)
+   return err;
}
 
err = kfd_event_create(filp, p, args->event_type,
@@ -1068,10 +1022,7 @@ static int kfd_ioctl_create_event(struct file *filp, 
struct kfd_process *p,
&args->event_page_offset,
&args->event_slot_index);
 
-   return err;
-
-out_unlock:
-   mutex_unlock(&p->mutex);
+   pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
return err;
 }
 
@@ -2022,7 +1973,7 @@ static int criu_get_process_object_info(struct 
kfd_process *p,
if (ret)
return ret;
 
-   num_events = 0; /* TODO: Implement Events */
+   num_events = kfd_get_num_events(p);
num_svm_ranges = 0; /* TODO: Implement SVM-Ranges */
 
*num_objects = num_queues + num_events + num_svm_ranges;
@@ -2031,7 +1982,7 @@ static int criu_get_process_object_info(struct 
kfd_process *p,
priv_size = sizeof(struct kfd_criu_process_priv_data);
priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
priv_size += queues_priv_data_size;
-   /* TODO: Add Events priv size */
+   priv_size += num_events * sizeof(struct 
kfd_criu_event_priv_data);
/* TODO: Add SVM ranges priv size */
*objs_priv_size = priv_size;
}
@@ -2093,7 +2044,10 @@ static int criu_checkpoint(struct file *filep,
if (ret)
goto exit_unlock;
 
-   /* TODO: Dump Events */
+   ret = kfd_criu_checkpoint_events(p, (uint8_t __user 
*)args->priv_data,
+&priv_offset);
+   if (ret)
+   goto exit_unlock;
 
/* TODO: Dump SVM-Ranges */
}
@@ -2406,8 +2360,8 @@ static int criu_restore_objects(struct file *filep,
goto exit;
break;
case KFD_CRIU_OBJECT_TYPE_EVENT:
-   /* TODO: Implement Events */
-   *priv_offset += sizeof(s

[Patch v4 21/24] drm/amdkfd: CRIU Discover svm ranges

A KFD process may contain a number of virtual address ranges for shared
virtual memory management and each such range can have many SVM
attributes spanning across various nodes within the process boundary.
This change reports the total number of such SVM ranges and
their total private data size by extending the PROCESS_INFO op of the the
CRIU IOCTL to discover the svm ranges in the target process and a future
patches brings in the required support for checkpoint and restore for
SVM ranges.


Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 12 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  5 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 60 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 11 +
 4 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 446eb9310915..1c25d5e9067c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2089,10 +2089,9 @@ static int criu_get_process_object_info(struct 
kfd_process *p,
uint32_t *num_objects,
uint64_t *objs_priv_size)
 {
-   int ret;
-   uint64_t priv_size;
+   uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;
uint32_t num_queues, num_events, num_svm_ranges;
-   uint64_t queues_priv_data_size;
+   int ret;
 
*num_devices = p->n_pdds;
*num_bos = get_process_num_bos(p);
@@ -2102,7 +2101,10 @@ static int criu_get_process_object_info(struct 
kfd_process *p,
return ret;
 
num_events = kfd_get_num_events(p);
-   num_svm_ranges = 0; /* TODO: Implement SVM-Ranges */
+
+   ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
+   if (ret)
+   return ret;
 
*num_objects = num_queues + num_events + num_svm_ranges;
 
@@ -2112,7 +2114,7 @@ static int criu_get_process_object_info(struct 
kfd_process *p,
priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
priv_size += queues_priv_data_size;
priv_size += num_events * sizeof(struct 
kfd_criu_event_priv_data);
-   /* TODO: Add SVM ranges priv size */
+   priv_size += svm_priv_data_size;
*objs_priv_size = priv_size;
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index d72dda84c18c..87eb6739a78e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1082,7 +1082,10 @@ enum kfd_criu_object_type {
 
 struct kfd_criu_svm_range_priv_data {
uint32_t object_type;
-   uint64_t reserved;
+   uint64_t start_addr;
+   uint64_t size;
+   /* Variable length array of attributes */
+   struct kfd_ioctl_svm_attribute attrs[0];
 };
 
 struct kfd_criu_queue_priv_data {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 7c92116153fe..49e05fb5c898 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -3418,6 +3418,66 @@ svm_range_get_attr(struct kfd_process *p, struct 
mm_struct *mm,
return 0;
 }
 
+int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+  uint64_t *svm_priv_data_size)
+{
+   uint64_t total_size, accessibility_size, common_attr_size;
+   int nattr_common = 4, naatr_accessibility = 1;
+   int num_devices = p->n_pdds;
+   struct svm_range_list *svms;
+   struct svm_range *prange;
+   uint32_t count = 0;
+
+   *svm_priv_data_size = 0;
+
+   svms = &p->svms;
+   if (!svms)
+   return -EINVAL;
+
+   mutex_lock(&svms->lock);
+   list_for_each_entry(prange, &svms->list, list) {
+   pr_debug("prange: 0x%p start: 0x%lx\t npages: 0x%llx\t end: 
0x%llx\n",
+prange, prange->start, prange->npages,
+prange->start + prange->npages - 1);
+   count++;
+   }
+   mutex_unlock(&svms->lock);
+
+   *num_svm_ranges = count;
+   /* Only the accessbility attributes need to be queried for all the gpus
+* individually, remaining ones are spanned across the entire process
+* regardless of the various gpu nodes. Of the remaining attributes,
+* KFD_IOCTL_SVM_ATTR_CLR_FLAGS need not be saved.
+*
+* KFD_IOCTL_SVM_ATTR_PREFERRED_LOC
+* KFD_IOCTL_SVM_ATTR_PREFETCH_LOC
+* KFD_IOCTL_SVM_ATTR_SET_FLAGS
+* KFD_IOCTL_SVM_ATTR_GRANULARITY
+*
+* ** ACCESSBILITY ATTRIBUTES **
+* (Considered as one, type is altered during query, value is gpuid)
+* KFD_IOCTL_SVM_ATTR_ACCESS
+* KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE
+* KFD_IOCTL_SVM_ATTR_NO_ACCESS
+*/

[Patch v4 12/24] drm/amdkfd: CRIU restore queue doorbell id

From: David Yat Sin 

When re-creating queues during CRIU restore, restore the queue with the
same doorbell id value used during CRIU dump.

Signed-off-by: David Yat Sin 

---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 60 +--
 1 file changed, 41 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 7e49f70b81b9..a0f5b8533a03 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -153,7 +153,13 @@ static void decrement_queue_count(struct 
device_queue_manager *dqm,
dqm->active_cp_queue_count--;
 }
 
-static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
+/*
+ * Allocate a doorbell ID to this queue.
+ * If doorbell_id is passed in, make sure requested ID is valid then allocate 
it.
+ */
+static int allocate_doorbell(struct qcm_process_device *qpd,
+struct queue *q,
+uint32_t const *restore_id)
 {
struct kfd_dev *dev = qpd->dqm->dev;
 
@@ -161,6 +167,10 @@ static int allocate_doorbell(struct qcm_process_device 
*qpd, struct queue *q)
/* On pre-SOC15 chips we need to use the queue ID to
 * preserve the user mode ABI.
 */
+
+   if (restore_id && *restore_id != q->properties.queue_id)
+   return -EINVAL;
+
q->doorbell_id = q->properties.queue_id;
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
@@ -169,25 +179,37 @@ static int allocate_doorbell(struct qcm_process_device 
*qpd, struct queue *q)
 * The doobell index distance between RLC (2*i) and (2*i+1)
 * for a SDMA engine is 512.
 */
-   uint32_t *idx_offset =
-   dev->shared_resources.sdma_doorbell_idx;
 
-   q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
-   + (q->properties.sdma_queue_id & 1)
-   * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
-   + (q->properties.sdma_queue_id >> 1);
+   uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx;
+   uint32_t valid_id = idx_offset[q->properties.sdma_engine_id]
+   + (q->properties.sdma_queue_id 
& 1)
+   * 
KFD_QUEUE_DOORBELL_MIRROR_OFFSET
+   + (q->properties.sdma_queue_id 
>> 1);
+
+   if (restore_id && *restore_id != valid_id)
+   return -EINVAL;
+   q->doorbell_id = valid_id;
} else {
-   /* For CP queues on SOC15 reserve a free doorbell ID */
-   unsigned int found;
-
-   found = find_first_zero_bit(qpd->doorbell_bitmap,
-   KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
-   if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
-   pr_debug("No doorbells available");
-   return -EBUSY;
+   /* For CP queues on SOC15 */
+   if (restore_id) {
+   /* make sure that ID is free  */
+   if (__test_and_set_bit(*restore_id, 
qpd->doorbell_bitmap))
+   return -EINVAL;
+
+   q->doorbell_id = *restore_id;
+   } else {
+   /* or reserve a free doorbell ID */
+   unsigned int found;
+
+   found = find_first_zero_bit(qpd->doorbell_bitmap,
+   
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+   if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+   pr_debug("No doorbells available");
+   return -EBUSY;
+   }
+   set_bit(found, qpd->doorbell_bitmap);
+   q->doorbell_id = found;
}
-   set_bit(found, qpd->doorbell_bitmap);
-   q->doorbell_id = found;
}
 
q->properties.doorbell_off =
@@ -355,7 +377,7 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
}
 
-   retval = allocate_doorbell(qpd, q);
+   retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
if (retval)
goto out_deallocate_hqd;
 
@@ -1338,7 +1360,7 @@ static int create_queue_cpsch(struct device_queue_manager 
*dqm, struct queue *q,
goto out;
}
 
-   retval = allocate_doorbell(qpd, q);
+   retval = allocate_doorbell(qpd, q, qd ? &qd->doorbel

[Patch v4 19/24] drm/amdkfd: CRIU allow external mm for svm ranges

Both svm_range_get_attr and svm_range_set_attr helpers use mm struct
from current but for a Checkpoint or Restore operation, the current->mm
will fetch the mm for the CRIU master process. So modify these helpers to
accept the task mm for a target kfd process to support Checkpoint
Restore.

Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 88360f23eb61..7c92116153fe 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -3134,11 +3134,11 @@ static void svm_range_evict_svm_bo_worker(struct 
work_struct *work)
 }
 
 static int
-svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
-  uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
+  uint64_t start, uint64_t size, uint32_t nattr,
+  struct kfd_ioctl_svm_attribute *attrs)
 {
struct amdkfd_process_info *process_info = p->kgd_process_info;
-   struct mm_struct *mm = current->mm;
struct list_head update_list;
struct list_head insert_list;
struct list_head remove_list;
@@ -3242,8 +3242,9 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, 
uint64_t size,
 }
 
 static int
-svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
-  uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
+  uint64_t start, uint64_t size, uint32_t nattr,
+  struct kfd_ioctl_svm_attribute *attrs)
 {
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
@@ -3253,7 +3254,6 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, 
uint64_t size,
bool get_accessible = false;
bool get_flags = false;
uint64_t last = start + size - 1UL;
-   struct mm_struct *mm = current->mm;
uint8_t granularity = 0xff;
struct interval_tree_node *node;
struct svm_range_list *svms;
@@ -3422,6 +3422,7 @@ int
 svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
  uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs)
 {
+   struct mm_struct *mm = current->mm;
int r;
 
start >>= PAGE_SHIFT;
@@ -3429,10 +3430,10 @@ svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op 
op, uint64_t start,
 
switch (op) {
case KFD_IOCTL_SVM_OP_SET_ATTR:
-   r = svm_range_set_attr(p, start, size, nattrs, attrs);
+   r = svm_range_set_attr(p, mm, start, size, nattrs, attrs);
break;
case KFD_IOCTL_SVM_OP_GET_ATTR:
-   r = svm_range_get_attr(p, start, size, nattrs, attrs);
+   r = svm_range_get_attr(p, mm, start, size, nattrs, attrs);
break;
default:
r = EINVAL;
-- 
2.17.1

[Patch v4 18/24] drm/amdkfd: CRIU checkpoint and restore xnack mode

Recoverable page faults are represented by the xnack mode setting inside
a kfd process and are used to represent the device page faults. For CR,
we don't consider negative values which are typically used for querying
the current xnack mode without modifying it.

Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  1 +
 2 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 178b0ccfb286..446eb9310915 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1845,6 +1845,11 @@ static int criu_checkpoint_process(struct kfd_process *p,
memset(&process_priv, 0, sizeof(process_priv));
 
process_priv.version = KFD_CRIU_PRIV_VERSION;
+   /* For CR, we don't consider negative xnack mode which is used for
+* querying without changing it, here 0 simply means disabled and 1
+* means enabled so retry for finding a valid PTE.
+*/
+   process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
 
ret = copy_to_user(user_priv_data + *priv_offset,
&process_priv, sizeof(process_priv));
@@ -2231,6 +2236,16 @@ static int criu_restore_process(struct kfd_process *p,
return -EINVAL;
}
 
+   pr_debug("Setting XNACK mode\n");
+   if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {
+   pr_err("xnack mode cannot be set\n");
+   ret = -EPERM;
+   goto exit;
+   } else {
+   pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);
+   p->xnack_enabled = process_priv.xnack_mode;
+   }
+
 exit:
return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 855c162b85ea..d72dda84c18c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1057,6 +1057,7 @@ void kfd_process_set_trap_handler(struct 
qcm_process_device *qpd,
 
 struct kfd_criu_process_priv_data {
uint32_t version;
+   uint32_t xnack_mode;
 };
 
 struct kfd_criu_device_priv_data {
-- 
2.17.1

[Patch v4 22/24] drm/amdkfd: CRIU Save Shared Virtual Memory ranges

During checkpoint stage, save the shared virtual memory ranges and
attributes for the target process. A process may contain a number of svm
ranges and each range might contain a number of arrtibutes. While not
all attributes may be applicable for a given prange but during
checkpoint we store all possible values for the max possible attribute
types.

Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 95 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 10 +++
 3 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 1c25d5e9067c..916b8d000317 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2186,7 +2186,9 @@ static int criu_checkpoint(struct file *filep,
if (ret)
goto close_bo_fds;
 
-   /* TODO: Dump SVM-Ranges */
+   ret = kfd_criu_checkpoint_svm(p, (uint8_t __user 
*)args->priv_data, &priv_offset);
+   if (ret)
+   goto close_bo_fds;
}
 
 close_bo_fds:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 49e05fb5c898..6d59f1bedcf2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -3478,6 +3478,101 @@ int svm_range_get_info(struct kfd_process *p, uint32_t 
*num_svm_ranges,
return 0;
 }
 
+int kfd_criu_checkpoint_svm(struct kfd_process *p,
+   uint8_t __user *user_priv_data,
+   uint64_t *priv_data_offset)
+{
+   struct kfd_criu_svm_range_priv_data *svm_priv = NULL;
+   struct kfd_ioctl_svm_attribute *query_attr = NULL;
+   uint64_t svm_priv_data_size, query_attr_size = 0;
+   int index, nattr_common = 4, ret = 0;
+   struct svm_range_list *svms;
+   int num_devices = p->n_pdds;
+   struct svm_range *prange;
+   struct mm_struct *mm;
+
+   svms = &p->svms;
+   if (!svms)
+   return -EINVAL;
+
+   mm = get_task_mm(p->lead_thread);
+   if (!mm) {
+   pr_err("failed to get mm for the target process\n");
+   return -ESRCH;
+   }
+
+   query_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+   (nattr_common + num_devices);
+
+   query_attr = kzalloc(query_attr_size, GFP_KERNEL);
+   if (!query_attr) {
+   ret = -ENOMEM;
+   goto exit;
+   }
+
+   query_attr[0].type = KFD_IOCTL_SVM_ATTR_PREFERRED_LOC;
+   query_attr[1].type = KFD_IOCTL_SVM_ATTR_PREFETCH_LOC;
+   query_attr[2].type = KFD_IOCTL_SVM_ATTR_SET_FLAGS;
+   query_attr[3].type = KFD_IOCTL_SVM_ATTR_GRANULARITY;
+
+   for (index = 0; index < num_devices; index++) {
+   struct kfd_process_device *pdd = p->pdds[index];
+
+   query_attr[index + nattr_common].type =
+   KFD_IOCTL_SVM_ATTR_ACCESS;
+   query_attr[index + nattr_common].value = pdd->user_gpu_id;
+   }
+
+   svm_priv_data_size = sizeof(*svm_priv) + query_attr_size;
+
+   svm_priv = kzalloc(svm_priv_data_size, GFP_KERNEL);
+   if (!svm_priv) {
+   ret = -ENOMEM;
+   goto exit_query;
+   }
+
+   index = 0;
+   list_for_each_entry(prange, &svms->list, list) {
+
+   svm_priv->object_type = KFD_CRIU_OBJECT_TYPE_SVM_RANGE;
+   svm_priv->start_addr = prange->start;
+   svm_priv->size = prange->npages;
+   memcpy(&svm_priv->attrs, query_attr, query_attr_size);
+   pr_debug("CRIU: prange: 0x%p start: 0x%lx\t npages: 0x%llx end: 
0x%llx\t size: 0x%llx\n",
+prange, prange->start, prange->npages,
+prange->start + prange->npages - 1,
+prange->npages * PAGE_SIZE);
+
+   ret = svm_range_get_attr(p, mm, svm_priv->start_addr,
+svm_priv->size,
+(nattr_common + num_devices),
+svm_priv->attrs);
+   if (ret) {
+   pr_err("CRIU: failed to obtain range attributes\n");
+   goto exit_priv;
+   }
+
+   ret = copy_to_user(user_priv_data + *priv_data_offset,
+  svm_priv, svm_priv_data_size);
+   if (ret) {
+   pr_err("Failed to copy svm priv to user\n");
+   goto exit_priv;
+   }
+
+   *priv_data_offset += svm_priv_data_size;
+
+   }
+
+
+exit_priv:
+   kfree(svm_priv);
+exit_query:
+   kfree(query_attr);
+exit:
+   mmput(mm);
+   return ret;
+}
+
 int
 svm_ioctl(struct kfd_process *p, enum

[Patch v4 11/24] drm/amdkfd: CRIU restore sdma id for queues

From: David Yat Sin 

When re-creating queues during CRIU restore, restore the queue with the
same sdma id value used during CRIU dump.

Signed-off-by: David Yat Sin 

---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 48 ++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 +-
 .../amd/amdkfd/kfd_process_queue_manager.c|  4 +-
 3 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 62fe28244a80..7e49f70b81b9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -58,7 +58,7 @@ static inline void deallocate_hqd(struct device_queue_manager 
*dqm,
struct queue *q);
 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
 static int allocate_sdma_queue(struct device_queue_manager *dqm,
-   struct queue *q);
+   struct queue *q, const uint32_t 
*restore_sdma_id);
 static void kfd_process_hw_exception(struct work_struct *work);
 
 static inline
@@ -308,7 +308,8 @@ static void deallocate_vmid(struct device_queue_manager 
*dqm,
 
 static int create_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
-   struct qcm_process_device *qpd)
+   struct qcm_process_device *qpd,
+   const struct kfd_criu_queue_priv_data *qd)
 {
struct mqd_manager *mqd_mgr;
int retval;
@@ -348,7 +349,7 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
q->pipe, q->queue);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-   retval = allocate_sdma_queue(dqm, q);
+   retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
if (retval)
goto deallocate_vmid;
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
@@ -1040,7 +1041,7 @@ static void pre_reset(struct device_queue_manager *dqm)
 }
 
 static int allocate_sdma_queue(struct device_queue_manager *dqm,
-   struct queue *q)
+   struct queue *q, const uint32_t 
*restore_sdma_id)
 {
int bit;
 
@@ -1050,9 +1051,21 @@ static int allocate_sdma_queue(struct 
device_queue_manager *dqm,
return -ENOMEM;
}
 
-   bit = __ffs64(dqm->sdma_bitmap);
-   dqm->sdma_bitmap &= ~(1ULL << bit);
-   q->sdma_id = bit;
+   if (restore_sdma_id) {
+   /* Re-use existing sdma_id */
+   if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) {
+   pr_err("SDMA queue already in use\n");
+   return -EBUSY;
+   }
+   dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id);
+   q->sdma_id = *restore_sdma_id;
+   } else {
+   /* Find first available sdma_id */
+   bit = __ffs64(dqm->sdma_bitmap);
+   dqm->sdma_bitmap &= ~(1ULL << bit);
+   q->sdma_id = bit;
+   }
+
q->properties.sdma_engine_id = q->sdma_id %
get_num_sdma_engines(dqm);
q->properties.sdma_queue_id = q->sdma_id /
@@ -1062,9 +1075,19 @@ static int allocate_sdma_queue(struct 
device_queue_manager *dqm,
pr_err("No more XGMI SDMA queue to allocate\n");
return -ENOMEM;
}
-   bit = __ffs64(dqm->xgmi_sdma_bitmap);
-   dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
-   q->sdma_id = bit;
+   if (restore_sdma_id) {
+   /* Re-use existing sdma_id */
+   if (!(dqm->xgmi_sdma_bitmap & (1ULL << 
*restore_sdma_id))) {
+   pr_err("SDMA queue already in use\n");
+   return -EBUSY;
+   }
+   dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id);
+   q->sdma_id = *restore_sdma_id;
+   } else {
+   bit = __ffs64(dqm->xgmi_sdma_bitmap);
+   dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
+   q->sdma_id = bit;
+   }
/* sdma_engine_id is sdma id including
 * both PCIe-optimized SDMAs and XGMI-
 * optimized SDMAs. The calculation below
@@ -1293,7 +1316,8 @@ static void destroy_kernel_queue_cpsch(struct 
device_queue_manager *dqm,
 }
 
 static int create_queue_cpsch(struct device_qu

[Patch v4 20/24] drm/amdkfd: use user_gpu_id for svm ranges

Currently the SVM ranges use actual_gpu_id but with Checkpoint Restore
support its possible that the SVM ranges can be resumed on another node
where the actual_gpu_id may not be same as the original (user_gpu_id)
gpu id. So modify svm code to use user_gpu_id.

Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 67e2432098d1..0769dc655e15 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1813,7 +1813,7 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, 
uint32_t gpu_id)
int i;
 
for (i = 0; i < p->n_pdds; i++)
-   if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
+   if (p->pdds[i] && gpu_id == p->pdds[i]->user_gpu_id)
return i;
return -EINVAL;
 }
@@ -1826,7 +1826,7 @@ kfd_process_gpuid_from_adev(struct kfd_process *p, struct 
amdgpu_device *adev,
 
for (i = 0; i < p->n_pdds; i++)
if (p->pdds[i] && p->pdds[i]->dev->adev == adev) {
-   *gpuid = p->pdds[i]->dev->id;
+   *gpuid = p->pdds[i]->user_gpu_id;
*gpuidx = i;
return 0;
}
-- 
2.17.1

[Patch v4 23/24] drm/amdkfd: CRIU prepare for svm resume

During CRIU restore phase, the VMAs for the virtual address ranges are
not at their final location yet so in this stage, only cache the data
required to successfully resume the svm ranges during an imminent CRIU
resume phase.

Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  5 ++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 99 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 12 +++
 4 files changed, 118 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 916b8d000317..f7aa15b18f95 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2638,8 +2638,8 @@ static int criu_restore_objects(struct file *filep,
goto exit;
break;
case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
-   /* TODO: Implement SVM range */
-   *priv_offset += sizeof(struct 
kfd_criu_svm_range_priv_data);
+   ret = kfd_criu_restore_svm(p, (uint8_t __user 
*)args->priv_data,
+priv_offset, 
max_priv_data_size);
if (ret)
goto exit;
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 87eb6739a78e..92191c541c29 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -790,6 +790,7 @@ struct svm_range_list {
struct list_headlist;
struct work_struct  deferred_list_work;
struct list_headdeferred_range_list;
+   struct list_headcriu_svm_metadata_list;
spinlock_t  deferred_list_lock;
atomic_tevicted_ranges;
booldrain_pagefaults;
@@ -1148,6 +1149,10 @@ int kfd_criu_restore_event(struct file *devkfd,
   uint8_t __user *user_priv_data,
   uint64_t *priv_data_offset,
   uint64_t max_priv_data_size);
+int kfd_criu_restore_svm(struct kfd_process *p,
+uint8_t __user *user_priv_data,
+uint64_t *priv_data_offset,
+uint64_t max_priv_data_size);
 /* CRIU - End */
 
 /* Queue Context Management */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 6d59f1bedcf2..e9f6c63c2a26 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -45,6 +45,14 @@
  */
 #define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING   2000
 
+struct criu_svm_metadata {
+   struct list_head list;
+   __u64 start_addr;
+   __u64 size;
+   /* Variable length array of attributes */
+   struct kfd_ioctl_svm_attribute attrs[0];
+};
+
 static void svm_range_evict_svm_bo_worker(struct work_struct *work);
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
@@ -2753,6 +2761,7 @@ int svm_range_list_init(struct kfd_process *p)
INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
INIT_LIST_HEAD(&svms->deferred_range_list);
+   INIT_LIST_HEAD(&svms->criu_svm_metadata_list);
spin_lock_init(&svms->deferred_list_lock);
 
for (i = 0; i < p->n_pdds; i++)
@@ -3418,6 +3427,96 @@ svm_range_get_attr(struct kfd_process *p, struct 
mm_struct *mm,
return 0;
 }
 
+int svm_criu_prepare_for_resume(struct kfd_process *p,
+   struct kfd_criu_svm_range_priv_data *svm_priv)
+{
+   int nattr_common = 4, nattr_accessibility = 1;
+   struct criu_svm_metadata *criu_svm_md = NULL;
+   uint64_t svm_attrs_size, svm_object_md_size;
+   struct svm_range_list *svms = &p->svms;
+   int num_devices = p->n_pdds;
+   int i, ret = 0;
+
+   svm_attrs_size = sizeof(struct kfd_ioctl_svm_attribute) *
+   (nattr_common + nattr_accessibility * num_devices);
+   svm_object_md_size = sizeof(struct criu_svm_metadata) + svm_attrs_size;
+
+   criu_svm_md = kzalloc(svm_object_md_size, GFP_KERNEL);
+   if (!criu_svm_md) {
+   pr_err("failed to allocate memory to store svm metadata\n");
+   ret = -ENOMEM;
+   goto exit;
+   }
+
+   criu_svm_md->start_addr = svm_priv->start_addr;
+   criu_svm_md->size = svm_priv->size;
+   for (i = 0; i < svm_attrs_size; i++)
+   {
+   criu_svm_md->attrs[i].type = svm_priv->attrs[i].type;
+   criu_svm_md->attrs[i].value = svm_priv->attrs[i].value;
+   }
+
+   list_add_tail(&criu_svm_md->list, &svms->criu_svm_m

[Patch v4 24/24] drm/amdkfd: CRIU resume shared virtual memory ranges

In CRIU resume stage, resume all the shared virtual memory ranges from
the data stored inside the resuming kfd process during CRIU restore
phase. Also setup xnack mode and free up the resources.

Signed-off-by: Rajneesh Bhardwaj 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 10 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 55 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  6 +++
 3 files changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f7aa15b18f95..6191e37656dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2759,7 +2759,17 @@ static int criu_resume(struct file *filep,
}
 
mutex_lock(&target->mutex);
+   ret = kfd_criu_resume_svm(target);
+   if (ret) {
+   pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);
+   goto exit;
+   }
+
ret =  amdgpu_amdkfd_criu_resume(target->kgd_process_info);
+   if (ret)
+   pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);
+
+exit:
mutex_unlock(&target->mutex);
 
kfd_unref_process(target);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index e9f6c63c2a26..bd2dce37f345 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -3427,6 +3427,61 @@ svm_range_get_attr(struct kfd_process *p, struct 
mm_struct *mm,
return 0;
 }
 
+int kfd_criu_resume_svm(struct kfd_process *p)
+{
+   int nattr_common = 4, nattr_accessibility = 1;
+   struct criu_svm_metadata *criu_svm_md = NULL;
+   struct criu_svm_metadata *next = NULL;
+   struct svm_range_list *svms = &p->svms;
+   int i, j, num_attrs, ret = 0;
+   struct mm_struct *mm;
+
+   if (list_empty(&svms->criu_svm_metadata_list)) {
+   pr_debug("No SVM data from CRIU restore stage 2\n");
+   return ret;
+   }
+
+   mm = get_task_mm(p->lead_thread);
+   if (!mm) {
+   pr_err("failed to get mm for the target process\n");
+   return -ESRCH;
+   }
+
+   num_attrs = nattr_common + (nattr_accessibility * p->n_pdds);
+
+   i = j = 0;
+   list_for_each_entry(criu_svm_md, &svms->criu_svm_metadata_list, list) {
+   pr_debug("criu_svm_md[%d]\n\tstart: 0x%llx size: 0x%llx 
(npages)\n",
+i, criu_svm_md->start_addr, criu_svm_md->size);
+   for (j = 0; j < num_attrs; j++) {
+   pr_debug("\ncriu_svm_md[%d]->attrs[%d].type : 0x%x 
\ncriu_svm_md[%d]->attrs[%d].value : 0x%x\n",
+i,j, criu_svm_md->attrs[j].type,
+i,j, criu_svm_md->attrs[j].value);
+   }
+
+   ret = svm_range_set_attr(p, mm, criu_svm_md->start_addr,
+criu_svm_md->size, num_attrs,
+criu_svm_md->attrs);
+   if (ret) {
+   pr_err("CRIU: failed to set range attributes\n");
+   goto exit;
+   }
+
+   i++;
+   }
+
+exit:
+   list_for_each_entry_safe(criu_svm_md, next, 
&svms->criu_svm_metadata_list, list) {
+   pr_debug("freeing criu_svm_md[]\n\tstart: 0x%llx\n",
+   criu_svm_md->start_addr);
+   kfree(criu_svm_md);
+   }
+
+   mmput(mm);
+   return ret;
+
+}
+
 int svm_criu_prepare_for_resume(struct kfd_process *p,
struct kfd_criu_svm_range_priv_data *svm_priv)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index e0c0853f085c..3b5bcb52723c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -195,6 +195,7 @@ int kfd_criu_restore_svm(struct kfd_process *p,
 uint8_t __user *user_priv_ptr,
 uint64_t *priv_data_offset,
 uint64_t max_priv_data_size);
+int kfd_criu_resume_svm(struct kfd_process *p);
 struct kfd_process_device *
 svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device 
*adev);
 void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct 
mm_struct *mm);
@@ -256,6 +257,11 @@ static inline int kfd_criu_restore_svm(struct kfd_process 
*p,
return -EINVAL;
 }
 
+static inline int kfd_criu_resume_svm(struct kfd_process *p)
+{
+   return 0;
+}
+
 #define KFD_IS_SVM_API_SUPPORTED(dev) false
 
 #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
-- 
2.17.1

[Patch v4 02/24] x86/configs: Add rock-rel_defconfig for amd-feature-criu branch