Re: [PATCH] drm/amdkfd: Checkpoint and restore queues on GFX11

2023-08-28 Thread Alex Deucher
Acked-by: Alex Deucher 

On Fri, Aug 25, 2023 at 4:10 PM David Francis  wrote:
>
> The code in kfd_mqd_manager_v11.c to support criu dump and
> restore of queue state was missing.
>
> Added it; should be equivalent to kfd_mqd_manager_v10.c.
>
> CC: Felix Kuehling 
> Signed-off-by: David Francis 
> ---
>  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  | 41 +++
>  1 file changed, 41 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> index 2319467d2d95..2a79d37da95d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
> @@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void 
> *mqd,
> return 0;
>  }
>
> +static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, 
> void *ctl_stack_dst)
> +{
> +   struct v11_compute_mqd *m;
> +
> +   m = get_mqd(mqd);
> +
> +   memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd));
> +}
> +
> +static void restore_mqd(struct mqd_manager *mm, void **mqd,
> +   struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
> +   struct queue_properties *qp,
> +   const void *mqd_src,
> +   const void *ctl_stack_src, const u32 ctl_stack_size)
> +{
> +   uint64_t addr;
> +   struct v11_compute_mqd *m;
> +
> +   m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
> +   addr = mqd_mem_obj->gpu_addr;
> +
> +   memcpy(m, mqd_src, sizeof(*m));
> +
> +   *mqd = m;
> +   if (gart_addr)
> +   *gart_addr = addr;
> +
> +   m->cp_hqd_pq_doorbell_control =
> +   qp->doorbell_off <<
> +   CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
> +   pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
> +   m->cp_hqd_pq_doorbell_control);
> +
> +   qp->is_active = 0;
> +}
> +
> +
>  static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
> struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
> struct queue_properties *q)
> @@ -457,6 +494,8 @@ struct mqd_manager *mqd_manager_init_v11(enum 
> KFD_MQD_TYPE type,
> mqd->is_occupied = kfd_is_occupied_cp;
> mqd->mqd_size = sizeof(struct v11_compute_mqd);
> mqd->get_wave_state = get_wave_state;
> +   mqd->checkpoint_mqd = checkpoint_mqd;
> +   mqd->restore_mqd = restore_mqd;
>  #if defined(CONFIG_DEBUG_FS)
> mqd->debugfs_show_mqd = debugfs_show_mqd;
>  #endif
> @@ -500,6 +539,8 @@ struct mqd_manager *mqd_manager_init_v11(enum 
> KFD_MQD_TYPE type,
> mqd->update_mqd = update_mqd_sdma;
> mqd->destroy_mqd = kfd_destroy_mqd_sdma;
> mqd->is_occupied = kfd_is_occupied_sdma;
> +   mqd->checkpoint_mqd = checkpoint_mqd;
> +   mqd->restore_mqd = restore_mqd;
> mqd->mqd_size = sizeof(struct v11_sdma_mqd);
>  #if defined(CONFIG_DEBUG_FS)
> mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
> --
> 2.34.1
>


RE: [PATCH] drm/amdkfd: Checkpoint and restore queues on GFX11

2023-08-28 Thread Kasiviswanathan, Harish
[AMD Official Use Only - General]

Reviewed-by: Harish Kasiviswanathan 

-Original Message-
From: amd-gfx  On Behalf Of David Francis
Sent: Friday, August 25, 2023 3:14 PM
To: amd-gfx@lists.freedesktop.org
Cc: Francis, David ; Kuehling, Felix 

Subject: [PATCH] drm/amdkfd: Checkpoint and restore queues on GFX11

The code in kfd_mqd_manager_v11.c to support criu dump and
restore of queue state was missing.

Added it; should be equivalent to kfd_mqd_manager_v10.c.

CC: Felix Kuehling 
Signed-off-by: David Francis 
---
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  | 41 +++
 1 file changed, 41 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 2319467d2d95..2a79d37da95d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void 
*mqd,
return 0;
 }

+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, 
void *ctl_stack_dst)
+{
+   struct v11_compute_mqd *m;
+
+   m = get_mqd(mqd);
+
+   memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+   struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+   struct queue_properties *qp,
+   const void *mqd_src,
+   const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+   uint64_t addr;
+   struct v11_compute_mqd *m;
+
+   m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
+   addr = mqd_mem_obj->gpu_addr;
+
+   memcpy(m, mqd_src, sizeof(*m));
+
+   *mqd = m;
+   if (gart_addr)
+   *gart_addr = addr;
+
+   m->cp_hqd_pq_doorbell_control =
+   qp->doorbell_off <<
+   CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+   pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+   m->cp_hqd_pq_doorbell_control);
+
+   qp->is_active = 0;
+}
+
+
 static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -457,6 +494,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v11_compute_mqd);
mqd->get_wave_state = get_wave_state;
+   mqd->checkpoint_mqd = checkpoint_mqd;
+   mqd->restore_mqd = restore_mqd;
 #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
@@ -500,6 +539,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = kfd_destroy_mqd_sdma;
mqd->is_occupied = kfd_is_occupied_sdma;
+   mqd->checkpoint_mqd = checkpoint_mqd;
+   mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct v11_sdma_mqd);
 #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
--
2.34.1



[PATCH] drm/amdkfd: Checkpoint and restore queues on GFX11

2023-08-25 Thread David Francis
The code in kfd_mqd_manager_v11.c to support criu dump and
restore of queue state was missing.

Added it; should be equivalent to kfd_mqd_manager_v10.c.

CC: Felix Kuehling 
Signed-off-by: David Francis 
---
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  | 41 +++
 1 file changed, 41 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 2319467d2d95..2a79d37da95d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void 
*mqd,
return 0;
 }
 
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, 
void *ctl_stack_dst)
+{
+   struct v11_compute_mqd *m;
+
+   m = get_mqd(mqd);
+
+   memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+   struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+   struct queue_properties *qp,
+   const void *mqd_src,
+   const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+   uint64_t addr;
+   struct v11_compute_mqd *m;
+
+   m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
+   addr = mqd_mem_obj->gpu_addr;
+
+   memcpy(m, mqd_src, sizeof(*m));
+
+   *mqd = m;
+   if (gart_addr)
+   *gart_addr = addr;
+
+   m->cp_hqd_pq_doorbell_control =
+   qp->doorbell_off <<
+   CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+   pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+   m->cp_hqd_pq_doorbell_control);
+
+   qp->is_active = 0;
+}
+
+
 static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -457,6 +494,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v11_compute_mqd);
mqd->get_wave_state = get_wave_state;
+   mqd->checkpoint_mqd = checkpoint_mqd;
+   mqd->restore_mqd = restore_mqd;
 #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
@@ -500,6 +539,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE 
type,
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = kfd_destroy_mqd_sdma;
mqd->is_occupied = kfd_is_occupied_sdma;
+   mqd->checkpoint_mqd = checkpoint_mqd;
+   mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct v11_sdma_mqd);
 #if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
-- 
2.34.1