Re: [PATCH 5/8] drm/amdgpu: Create context for usermode queue

2023-02-06 Thread Christian König

Am 07.02.23 um 08:51 schrieb Shashank Sharma:


On 07/02/2023 08:14, Christian König wrote:

Am 03.02.23 um 22:54 schrieb Shashank Sharma:

The FW expects us to allocate atleast one page as context space to
process gang, process, shadow, GDS and FW_space related work. This
patch creates some object for the same, and adds an IP specific
functions to do this.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |  32 +
  .../amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c | 121 
++

  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  18 +++
  3 files changed, 171 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index 9f3490a91776..18281b3a51f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -42,6 +42,28 @@ static struct amdgpu_usermode_queue
  return idr_find(&uq_mgr->userq_idr, qid);
  }
  +static void
+amdgpu_userqueue_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+   struct amdgpu_usermode_queue 
*queue)

+{
+    uq_mgr->userq_mqd_funcs->ctx_destroy(uq_mgr, queue);
+}
+
+static int
+amdgpu_userqueue_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+  struct amdgpu_usermode_queue *queue)
+{
+    int r;
+
+    r = uq_mgr->userq_mqd_funcs->ctx_create(uq_mgr, queue);
+    if (r) {
+    DRM_ERROR("Failed to create context space for queue\n");
+    return r;
+    }
+
+    return 0;
+}
+
  static int
  amdgpu_userqueue_create_mqd(struct amdgpu_userq_mgr *uq_mgr, 
struct amdgpu_usermode_queue *queue)

  {
@@ -142,12 +164,21 @@ static int amdgpu_userqueue_create(struct 
drm_file *filp, union drm_amdgpu_userq

  goto free_qid;
  }
  +    r = amdgpu_userqueue_create_ctx_space(uq_mgr, queue);
+    if (r) {
+    DRM_ERROR("Failed to create context space\n");
+    goto free_mqd;
+    }
+
  list_add_tail(&queue->userq_node, &uq_mgr->userq_list);
  args->out.q_id = queue->queue_id;
  args->out.flags = 0;
  mutex_unlock(&uq_mgr->userq_mutex);
  return 0;
  +free_mqd:
+    amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
+
  free_qid:
  amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
  @@ -170,6 +201,7 @@ static void amdgpu_userqueue_destroy(struct 
drm_file *filp, int queue_id)

  }
    mutex_lock(&uq_mgr->userq_mutex);
+    amdgpu_userqueue_destroy_ctx_space(uq_mgr, queue);
  amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
  amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
  list_del(&queue->userq_node);
diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c

index 57889729d635..687f90a587e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
@@ -120,6 +120,125 @@ amdgpu_userq_gfx_v11_mqd_destroy(struct 
amdgpu_userq_mgr *uq_mgr, struct amdgpu_

    }
  +static int amdgpu_userq_gfx_v11_ctx_create(struct 
amdgpu_userq_mgr *uq_mgr,
+   struct 
amdgpu_usermode_queue *queue)

+{
+    int r;
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
+    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
+    struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
+    struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
+    struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
+
+    /*
+ * The FW expects atleast one page space allocated for
+ * process context related work, and one for gang context.
+ */
+    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_PROC_CTX_SZ, 
PAGE_SIZE,

+    AMDGPU_GEM_DOMAIN_VRAM,
+    &pctx->obj,
+    &pctx->gpu_addr,
+    &pctx->cpu_ptr);


Again, don't use amdgpu_bo_create_kernel() for any of this.

Noted,



+    if (r) {
+    DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
+    return r;
+    }
+
+    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GANG_CTX_SZ, 
PAGE_SIZE,

+    AMDGPU_GEM_DOMAIN_VRAM,
+    &gctx->obj,
+    &gctx->gpu_addr,
+    &gctx->cpu_ptr);
+    if (r) {
+    DRM_ERROR("Failed to allocate gang bo for userqueue (%d)", r);
+    goto err_gangctx;
+    }
+
+    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GDS_CTX_SZ, 
PAGE_SIZE,

+    AMDGPU_GEM_DOMAIN_VRAM,
+    &gdsctx->obj,
+    &gdsctx->gpu_addr,
+    &gdsctx->cpu_ptr);
+    if (r) {
+    DRM_ERROR("Failed to allocate GDS bo for userqueue (%d)

Re: [PATCH 6/8] drm/amdgpu: Map userqueue into HW

2023-02-06 Thread Shashank Sharma



On 07/02/2023 08:20, Christian König wrote:



Am 03.02.23 um 22:54 schrieb Shashank Sharma:

From: Shashank Sharma 

This patch adds new fptrs to prepare the usermode queue to be
mapped or unmapped into the HW. After this mapping, the queue
will be ready to accept the workload.

V1: Addressed review comments from Alex on the RFC patch series
 - Map/Unmap should be IP specific.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 57 +++
  .../amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c | 47 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  2 +
  3 files changed, 106 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index 18281b3a51f1..cbfe2608c040 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -42,6 +42,53 @@ static struct amdgpu_usermode_queue
  return idr_find(&uq_mgr->userq_idr, qid);
  }
  +static void
+amdgpu_userqueue_unmap(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+    int r;
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct mes_remove_queue_input remove_request;
+
+    uq_mgr->userq_mqd_funcs->prepare_unmap(uq_mgr, queue, (void 
*)&remove_request);

+
+    amdgpu_mes_lock(&adev->mes);
+    r = adev->mes.funcs->remove_hw_queue(&adev->mes, &remove_request);
+    amdgpu_mes_unlock(&adev->mes);
+    if (r) {
+    DRM_ERROR("Failed to unmap usermode queue %d\n", 
queue->queue_id);

+    return;
+    }
+
+    DRM_DEBUG_DRIVER("Usermode queue %d unmapped\n", queue->queue_id);
+}
+
+static int
+amdgpu_userqueue_map(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+    int r;
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct mes_add_queue_input add_request;
+
+    r = uq_mgr->userq_mqd_funcs->prepare_map(uq_mgr, queue, (void 
*)&add_request);

+    if (r) {
+    DRM_ERROR("Failed to map userqueue\n");
+    return r;
+    }
+
+    amdgpu_mes_lock(&adev->mes);
+    r = adev->mes.funcs->add_hw_queue(&adev->mes, &add_request);
+    amdgpu_mes_unlock(&adev->mes);
+    if (r) {
+    DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
+    return r;
+    }
+
+    DRM_DEBUG_DRIVER("Queue %d mapped successfully\n", 
queue->queue_id);

+    return 0;
+}
+
  static void
  amdgpu_userqueue_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
 struct amdgpu_usermode_queue 
*queue)
@@ -170,12 +217,21 @@ static int amdgpu_userqueue_create(struct 
drm_file *filp, union drm_amdgpu_userq

  goto free_mqd;
  }
  +    r = amdgpu_userqueue_map(uq_mgr, queue);
+    if (r) {
+    DRM_ERROR("Failed to map userqueue\n");
+    goto free_ctx;
+    }
+
  list_add_tail(&queue->userq_node, &uq_mgr->userq_list);
  args->out.q_id = queue->queue_id;
  args->out.flags = 0;
  mutex_unlock(&uq_mgr->userq_mutex);
  return 0;
  +free_ctx:
+    amdgpu_userqueue_destroy_ctx_space(uq_mgr, queue);
+
  free_mqd:
  amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
  @@ -201,6 +257,7 @@ static void amdgpu_userqueue_destroy(struct 
drm_file *filp, int queue_id)

  }
    mutex_lock(&uq_mgr->userq_mutex);
+    amdgpu_userqueue_unmap(uq_mgr, queue);
  amdgpu_userqueue_destroy_ctx_space(uq_mgr, queue);
  amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
  amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c

index 687f90a587e3..d317bb600fd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
@@ -24,6 +24,7 @@
  #include "amdgpu_userqueue.h"
  #include "v11_structs.h"
  #include "amdgpu_mes.h"
+#include "mes_api_def.h"
  #include "gc/gc_11_0_0_offset.h"
  #include "gc/gc_11_0_0_sh_mask.h"
  @@ -239,6 +240,50 @@ static void 
amdgpu_userq_gfx_v11_ctx_destroy(struct amdgpu_userq_mgr *uq_mgr,

    &pctx->cpu_ptr);
  }
  +static int
+amdgpu_userq_gfx_v11_prepare_map(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ void *q_input)
+{
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct mes_add_queue_input *queue_input = q_input;
+
+    memset(queue_input, 0x0, sizeof(struct mes_add_queue_input));
+
+    queue_input->process_va_start = 0;
+    queue_input->process_va_end = (adev->vm_manager.max_pfn - 1) << 
AMDGPU_GPU_PAGE_SHIFT;

+    queue_input->process_quantum = 10; /* 10ms */
+    queue_input->gang_quantum = 1; /* 1ms */
+    queue_input->paging = false;
+
+    queue_input->gang_context_addr = queue->gang_ctx.gpu_addr;
+    queue_input->pro

Re: [PATCH 5/8] drm/amdgpu: Create context for usermode queue

2023-02-06 Thread Shashank Sharma



On 07/02/2023 08:14, Christian König wrote:

Am 03.02.23 um 22:54 schrieb Shashank Sharma:

The FW expects us to allocate atleast one page as context space to
process gang, process, shadow, GDS and FW_space related work. This
patch creates some object for the same, and adds an IP specific
functions to do this.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |  32 +
  .../amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c | 121 ++
  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  18 +++
  3 files changed, 171 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index 9f3490a91776..18281b3a51f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -42,6 +42,28 @@ static struct amdgpu_usermode_queue
  return idr_find(&uq_mgr->userq_idr, qid);
  }
  +static void
+amdgpu_userqueue_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+   struct amdgpu_usermode_queue *queue)
+{
+    uq_mgr->userq_mqd_funcs->ctx_destroy(uq_mgr, queue);
+}
+
+static int
+amdgpu_userqueue_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+  struct amdgpu_usermode_queue *queue)
+{
+    int r;
+
+    r = uq_mgr->userq_mqd_funcs->ctx_create(uq_mgr, queue);
+    if (r) {
+    DRM_ERROR("Failed to create context space for queue\n");
+    return r;
+    }
+
+    return 0;
+}
+
  static int
  amdgpu_userqueue_create_mqd(struct amdgpu_userq_mgr *uq_mgr, struct 
amdgpu_usermode_queue *queue)

  {
@@ -142,12 +164,21 @@ static int amdgpu_userqueue_create(struct 
drm_file *filp, union drm_amdgpu_userq

  goto free_qid;
  }
  +    r = amdgpu_userqueue_create_ctx_space(uq_mgr, queue);
+    if (r) {
+    DRM_ERROR("Failed to create context space\n");
+    goto free_mqd;
+    }
+
  list_add_tail(&queue->userq_node, &uq_mgr->userq_list);
  args->out.q_id = queue->queue_id;
  args->out.flags = 0;
  mutex_unlock(&uq_mgr->userq_mutex);
  return 0;
  +free_mqd:
+    amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
+
  free_qid:
  amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
  @@ -170,6 +201,7 @@ static void amdgpu_userqueue_destroy(struct 
drm_file *filp, int queue_id)

  }
    mutex_lock(&uq_mgr->userq_mutex);
+    amdgpu_userqueue_destroy_ctx_space(uq_mgr, queue);
  amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
  amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
  list_del(&queue->userq_node);
diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c

index 57889729d635..687f90a587e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
@@ -120,6 +120,125 @@ amdgpu_userq_gfx_v11_mqd_destroy(struct 
amdgpu_userq_mgr *uq_mgr, struct amdgpu_

    }
  +static int amdgpu_userq_gfx_v11_ctx_create(struct amdgpu_userq_mgr 
*uq_mgr,
+   struct 
amdgpu_usermode_queue *queue)

+{
+    int r;
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
+    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
+    struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
+    struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
+    struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
+
+    /*
+ * The FW expects atleast one page space allocated for
+ * process context related work, and one for gang context.
+ */
+    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_PROC_CTX_SZ, 
PAGE_SIZE,

+    AMDGPU_GEM_DOMAIN_VRAM,
+    &pctx->obj,
+    &pctx->gpu_addr,
+    &pctx->cpu_ptr);


Again, don't use amdgpu_bo_create_kernel() for any of this.

Noted,



+    if (r) {
+    DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
+    return r;
+    }
+
+    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GANG_CTX_SZ, 
PAGE_SIZE,

+    AMDGPU_GEM_DOMAIN_VRAM,
+    &gctx->obj,
+    &gctx->gpu_addr,
+    &gctx->cpu_ptr);
+    if (r) {
+    DRM_ERROR("Failed to allocate gang bo for userqueue (%d)", r);
+    goto err_gangctx;
+    }
+
+    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GDS_CTX_SZ, 
PAGE_SIZE,

+    AMDGPU_GEM_DOMAIN_VRAM,
+    &gdsctx->obj,
+    &gdsctx->gpu_addr,
+    &gdsctx->cpu_ptr);
+    if (r) {
+    DRM_ERROR("Failed to allocate GDS bo for userqueue (%d)", r);
+    goto err_gdsctx;
+    }
+
+    r =

Re: [PATCH 3/8] drm/amdgpu: introduce userqueue MQD handlers

2023-02-06 Thread Shashank Sharma



On 07/02/2023 08:11, Christian König wrote:

Am 03.02.23 um 22:54 schrieb Shashank Sharma:

From: Shashank Sharma 

A Memory queue descriptor (MQD) of a userqueue defines it in the 
harware's
context. As the method of formation of a MQD, and its format can vary 
between
different graphics IPs, we need gfx GEN specific handlers to create 
MQDs.


This patch:
- Introduces MQD hander functions for the usermode queues
- A general function to create and destroy MQD for a userqueue.

V1: Worked on review comments from Alex on RFC patches:
 MQD creation should be gen and IP specific.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 64 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  9 +++
  2 files changed, 73 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index d5bc7fe81750..625c2fe1e84a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -42,6 +42,60 @@ static struct amdgpu_usermode_queue
  return idr_find(&uq_mgr->userq_idr, qid);
  }
  +static int
+amdgpu_userqueue_create_mqd(struct amdgpu_userq_mgr *uq_mgr, struct 
amdgpu_usermode_queue *queue)

+{
+    int r;
+    int size;
+    struct amdgpu_device *adev = uq_mgr->adev;
+
+    if (!uq_mgr->userq_mqd_funcs) {
+    DRM_ERROR("Userqueue not initialized\n");
+    return -EINVAL;
+    }
+
+    size = uq_mgr->userq_mqd_funcs->mqd_size(uq_mgr);
+    r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+    AMDGPU_GEM_DOMAIN_VRAM,
+    &queue->mqd_obj,
+    &queue->mqd_gpu_addr,
+    &queue->mqd_cpu_ptr);


We can't use amdgpu_bo_create_kernel() here, this pins the BO.

Instead all BOs of the process must be fenced with some eviction fence.



Noted,

- Shashank



Christian.


+    if (r) {
+    DRM_ERROR("Failed to allocate bo for userqueue (%d)", r);
+    return r;
+    }
+
+    memset(queue->mqd_cpu_ptr, 0, size);
+    r = amdgpu_bo_reserve(queue->mqd_obj, false);
+    if (unlikely(r != 0)) {
+    DRM_ERROR("Failed to reserve mqd for userqueue (%d)", r);
+    goto free_mqd;
+    }
+
+    r = uq_mgr->userq_mqd_funcs->mqd_create(uq_mgr, queue);
+    amdgpu_bo_unreserve(queue->mqd_obj);
+    if (r) {
+    DRM_ERROR("Failed to create MQD for queue\n");
+    goto free_mqd;
+    }
+    return 0;
+
+free_mqd:
+    amdgpu_bo_free_kernel(&queue->mqd_obj,
+   &queue->mqd_gpu_addr,
+   &queue->mqd_cpu_ptr);
+   return r;
+}
+
+static void
+amdgpu_userqueue_destroy_mqd(struct amdgpu_userq_mgr *uq_mgr, struct 
amdgpu_usermode_queue *queue)

+{
+    uq_mgr->userq_mqd_funcs->mqd_destroy(uq_mgr, queue);
+    amdgpu_bo_free_kernel(&queue->mqd_obj,
+   &queue->mqd_gpu_addr,
+   &queue->mqd_cpu_ptr);
+}
+
  static int amdgpu_userqueue_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)

  {
  int r, pasid;
@@ -82,12 +136,21 @@ static int amdgpu_userqueue_create(struct 
drm_file *filp, union drm_amdgpu_userq

  goto free_queue;
  }
  +    r = amdgpu_userqueue_create_mqd(uq_mgr, queue);
+    if (r) {
+    DRM_ERROR("Failed to create MQD\n");
+    goto free_qid;
+    }
+
  list_add_tail(&queue->userq_node, &uq_mgr->userq_list);
  args->out.q_id = queue->queue_id;
  args->out.flags = 0;
  mutex_unlock(&uq_mgr->userq_mutex);
  return 0;
  +free_qid:
+    amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
+
  free_queue:
  mutex_unlock(&uq_mgr->userq_mutex);
  kfree(queue);
@@ -107,6 +170,7 @@ static void amdgpu_userqueue_destroy(struct 
drm_file *filp, int queue_id)

  }
    mutex_lock(&uq_mgr->userq_mutex);
+    amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
  amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
  list_del(&queue->userq_node);
  mutex_unlock(&uq_mgr->userq_mutex);
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h 
b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h

index 9557588fe34f..a6abdfd5cb74 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -26,10 +26,13 @@
    #define AMDGPU_MAX_USERQ 512
  +struct amdgpu_userq_mqd_funcs;
+
  struct amdgpu_userq_mgr {
  struct idr userq_idr;
  struct mutex userq_mutex;
  struct list_head userq_list;
+    const struct amdgpu_userq_mqd_funcs *userq_mqd_funcs;
  struct amdgpu_device *adev;
  };
  @@ -57,6 +60,12 @@ struct amdgpu_usermode_queue {
    int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct 
drm_file *filp);

  +struct amdgpu_userq_mqd_funcs {
+    int (*mqd_size)(struct amdgpu_userq_mgr *);
+    int (*mqd_create)(struct amdgpu_userq_mgr *, struct 
amdgpu_usermode_queue *);
+    void (*mqd_destroy)(struc

Re: [PATCH 2/8] drm/amdgpu: add usermode queues

2023-02-06 Thread Shashank Sharma



On 07/02/2023 08:08, Christian König wrote:

Am 03.02.23 um 22:54 schrieb Shashank Sharma:

From: Shashank Sharma 

This patch adds skeleton code for usermode queue creation. It
typically contains:
- A new structure to keep all the user queue data in one place.
- An IOCTL function to create/free a usermode queue.
- A function to generate unique index for the queue.
- A queue context manager in driver private data.

V1: Worked on design review comments from RFC patch series:
(https://patchwork.freedesktop.org/series/112214/)

- Alex: Keep a list of queues, instead of single queue per process.
- Christian: Use the queue manager instead of global ptrs,
    Don't keep the queue structure in amdgpu_ctx

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |   5 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 155 ++
  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  64 
  6 files changed, 230 insertions(+)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_userqueue.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile

index 798d0e9a60b7..764801cc8203 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -210,6 +210,8 @@ amdgpu-y += \
  # add amdkfd interfaces
  amdgpu-y += amdgpu_amdkfd.o
  +# add usermode queue
+amdgpu-y += amdgpu_userqueue.o
    ifneq ($(CONFIG_HSA_AMD),)
  AMDKFD_PATH := ../amdkfd
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 6b74df446694..0625d6bdadf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -109,6 +109,7 @@
  #include "amdgpu_fdinfo.h"
  #include "amdgpu_mca.h"
  #include "amdgpu_ras.h"
+#include "amdgpu_userqueue.h"
    #define MAX_GPU_INSTANCE    16
  @@ -482,6 +483,7 @@ struct amdgpu_fpriv {
  struct mutex    bo_list_lock;
  struct idr    bo_list_handles;
  struct amdgpu_ctx_mgr    ctx_mgr;
+    struct amdgpu_userq_mgr    userq_mgr;
  };
    int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv 
**fpriv);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index b4f2d61ea0d5..229976a2d0e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -52,6 +52,7 @@
  #include "amdgpu_ras.h"
  #include "amdgpu_xgmi.h"
  #include "amdgpu_reset.h"
+#include "amdgpu_userqueue.h"
    /*
   * KMS wrapper.
@@ -2748,6 +2749,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] 
= {
  DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
  DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
  DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+    DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),

  };
    static const struct drm_driver amdgpu_kms_driver = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index 7aa7e52ca784..52e61e339a88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1187,6 +1187,10 @@ int amdgpu_driver_open_kms(struct drm_device 
*dev, struct drm_file *file_priv)

    amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
  +    r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, adev);
+    if (r)
+    DRM_WARN("Can't setup usermode queues, only legacy workload 
submission will work\n");

+
  file_priv->driver_priv = fpriv;
  goto out_suspend;
  @@ -1254,6 +1258,7 @@ void amdgpu_driver_postclose_kms(struct 
drm_device *dev,

    amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
  amdgpu_vm_fini(adev, &fpriv->vm);
+    amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
    if (pasid)
  amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

new file mode 100644
index ..d5bc7fe81750
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom 
the

+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The 

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-06 Thread Shashank Sharma



On 07/02/2023 08:03, Christian König wrote:

Am 06.02.23 um 22:03 schrieb Alex Deucher:

On Mon, Feb 6, 2023 at 12:01 PM Christian König
 wrote:

Am 06.02.23 um 17:56 schrieb Alex Deucher:
On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma 
 wrote:

Hey Alex,

On 03/02/2023 23:07, Alex Deucher wrote:
On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma 
 wrote:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
    include/uapi/drm/amdgpu_drm.h | 53 
+++

    1 file changed, 53 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h 
b/include/uapi/drm/amdgpu_drm.h

index 4038abe8505a..6c5235d107b3 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
    #define DRM_AMDGPU_VM  0x13
    #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
    #define DRM_AMDGPU_SCHED   0x15
+#define DRM_AMDGPU_USERQ   0x16

    #define DRM_IOCTL_AMDGPU_GEM_CREATE 
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union 
drm_amdgpu_gem_create)
    #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE 
+ DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)

@@ -71,6 +72,7 @@ extern "C" {
    #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
    #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE 
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union 
drm_amdgpu_fence_to_handle)
    #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)


    /**
 * DOC: memory domains
@@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
   union drm_amdgpu_ctx_out out;
    };

+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;

Since doorbells are 64 bit, maybe this offset should be in qwords.
Can you please help to cross check this information ? All the 
existing
kernel doorbell calculations are keeping doorbells size as 
sizeof(u32)

Doorbells on pre-vega hardware are 32 bits so that is where that comes
from, but from vega onward most doorbells are 64 bit.  I think some
versions of VCN may still use 32 bit doorbells.  Internally in the
kernel driver we just use two slots for newer hardware, but for the
UAPI, I think we can just stick with 64 bit slots to avoid confusion.
Even if an engine only uses a 32 bit one, I don't know that there is
much value to trying to support variable doorbell sizes.

I think we can stick with using __u32 because this is *not* the size of
the doorbell entries.

Instead this is the offset into the BO where to find the doorbell for
this queue (which then in turn is 64bits wide).

Since we will probably never have more than 4GiB doorbells we should be
pretty save to use 32bits here.
Yes, the offset would still be 32 bits, but the units would be 
qwords.  E.g.,


+   /** Doorbell offset in qwords */
+   __u32   doorbell_offset;

That way you couldn't accidently specify an overlapping doorbell.


Ah, so you only wanted to fix the comment. That was absolutely not 
clear from the discussion.


If I understand this correctly, the offset of the doorbell in the BO is 
still is 32-bit, but its width (size in bytes) is 64 bits. Am I getting 
that right ?


- Shashank



Christian.



Alex


Christian.


Alex


+   /** GPU virtual address of the queue */
+   __u64   queue_va;
+   /** Size of the queue in bytes */
+   __u64   queue_size;
+   /** GPU virtual address of the rptr */
+   __u64   rptr_va;
+   /** GPU virtual address of the wptr */
+   __u64   wptr_va;
+};
+
+struct drm_amdgpu_userq_in {
+   /** AMDGPU_USERQ_OP_* */
+   __u32   op;
+   /** Flags */
+   __u32   flags;
+   /** Queue handle to associate the queue free call with,
+    * unused for queue create calls */
+   __u32   queue_id;
+   __u32   pad;
+   /** Queue descriptor */
+   struct drm_amdgpu_userq_mqd mqd;
+};
+
+struct drm_amdgpu_userq_out {
+   /** Queue handle */
+   __u32   q_id;

Maybe this should be queue_id to 

Re: [PATCH] drm/amdgpu: Use the TGID for trace_amdgpu_vm_update_ptes

2023-02-06 Thread Christian König

That sounds like a good idea to me as well.

If you think that a patch should be backported please add a "CC: 
sta...@vger.kernel.org" tag to it before sending it out.


We can always remove it if we don't think a backport is appropriated, 
but maintainers seldom add it by themself.


Thanks,
Christian.

Am 07.02.23 um 00:09 schrieb Friedrich Vock:

Hi,

thanks for applying the patch!

Do you think it'd also be possible to backport it to previous kernel
versions or do you already plan to do that?
Since it is a one-liner bugfix it shouldn't be too hard to backport.

Thank you,
Friedrich Vock

On 06.02.23 21:26, Alex Deucher wrote:

Applied.  Thanks!

Alex

On Mon, Feb 6, 2023 at 3:35 AM Christian König 
 wrote:



Am 02.02.23 um 17:21 schrieb Friedrich Vock:

The pid field corresponds to the result of gettid() in userspace.
However, userspace cannot reliably attribute PTE events to processes
with just the thread id. This patch allows userspace to easily
attribute PTE update events to specific processes by comparing this
field with the result of getpid().

For attributing events to specific threads, the thread id is also
contained in the common fields of each trace event.

Signed-off-by: Friedrich Vock 

Ah, yes that makes more sense. Reviewed-by: Christian König


Alex do you pick this up or should I take care of it?

Thanks,
Christian.


---
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c

index b5f3bba851db..01e42bdd8e4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -974,7 +974,7 @@ int amdgpu_vm_ptes_update(struct 
amdgpu_vm_update_params *params,
   trace_amdgpu_vm_update_ptes(params, 
frag_start, upd_end,

min(nptes, 32u), dst, incr,
upd_flags,
- vm->task_info.pid,
+ vm->task_info.tgid,
vm->immediate.fence_context);
   amdgpu_vm_pte_update_flags(params, 
to_amdgpu_bo_vm(pt),

cursor.level, pe_start, dst,
--
2.39.1





Re: [PATCH 6/8] drm/amdgpu: Map userqueue into HW

2023-02-06 Thread Christian König




Am 03.02.23 um 22:54 schrieb Shashank Sharma:

From: Shashank Sharma 

This patch adds new fptrs to prepare the usermode queue to be
mapped or unmapped into the HW. After this mapping, the queue
will be ready to accept the workload.

V1: Addressed review comments from Alex on the RFC patch series
 - Map/Unmap should be IP specific.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 57 +++
  .../amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c | 47 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h|  2 +
  3 files changed, 106 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 18281b3a51f1..cbfe2608c040 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -42,6 +42,53 @@ static struct amdgpu_usermode_queue
  return idr_find(&uq_mgr->userq_idr, qid);
  }
  
+static void

+amdgpu_userqueue_unmap(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+int r;
+struct amdgpu_device *adev = uq_mgr->adev;
+struct mes_remove_queue_input remove_request;
+
+uq_mgr->userq_mqd_funcs->prepare_unmap(uq_mgr, queue, (void 
*)&remove_request);
+
+amdgpu_mes_lock(&adev->mes);
+r = adev->mes.funcs->remove_hw_queue(&adev->mes, &remove_request);
+amdgpu_mes_unlock(&adev->mes);
+if (r) {
+DRM_ERROR("Failed to unmap usermode queue %d\n", queue->queue_id);
+return;
+}
+
+DRM_DEBUG_DRIVER("Usermode queue %d unmapped\n", queue->queue_id);
+}
+
+static int
+amdgpu_userqueue_map(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+int r;
+struct amdgpu_device *adev = uq_mgr->adev;
+struct mes_add_queue_input add_request;
+
+r = uq_mgr->userq_mqd_funcs->prepare_map(uq_mgr, queue, (void 
*)&add_request);
+if (r) {
+DRM_ERROR("Failed to map userqueue\n");
+return r;
+}
+
+amdgpu_mes_lock(&adev->mes);
+r = adev->mes.funcs->add_hw_queue(&adev->mes, &add_request);
+amdgpu_mes_unlock(&adev->mes);
+if (r) {
+DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
+return r;
+}
+
+DRM_DEBUG_DRIVER("Queue %d mapped successfully\n", queue->queue_id);
+return 0;
+}
+
  static void
  amdgpu_userqueue_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
 struct amdgpu_usermode_queue *queue)
@@ -170,12 +217,21 @@ static int amdgpu_userqueue_create(struct drm_file *filp, 
union drm_amdgpu_userq
  goto free_mqd;
  }
  
+r = amdgpu_userqueue_map(uq_mgr, queue);

+if (r) {
+DRM_ERROR("Failed to map userqueue\n");
+goto free_ctx;
+}
+
  list_add_tail(&queue->userq_node, &uq_mgr->userq_list);
  args->out.q_id = queue->queue_id;
  args->out.flags = 0;
  mutex_unlock(&uq_mgr->userq_mutex);
  return 0;
  
+free_ctx:

+amdgpu_userqueue_destroy_ctx_space(uq_mgr, queue);
+
  free_mqd:
  amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
  
@@ -201,6 +257,7 @@ static void amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)

  }
  
  mutex_lock(&uq_mgr->userq_mutex);

+amdgpu_userqueue_unmap(uq_mgr, queue);
  amdgpu_userqueue_destroy_ctx_space(uq_mgr, queue);
  amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
  amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
index 687f90a587e3..d317bb600fd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
@@ -24,6 +24,7 @@
  #include "amdgpu_userqueue.h"
  #include "v11_structs.h"
  #include "amdgpu_mes.h"
+#include "mes_api_def.h"
  #include "gc/gc_11_0_0_offset.h"
  #include "gc/gc_11_0_0_sh_mask.h"
  
@@ -239,6 +240,50 @@ static void amdgpu_userq_gfx_v11_ctx_destroy(struct amdgpu_userq_mgr *uq_mgr,

&pctx->cpu_ptr);
  }
  
+static int

+amdgpu_userq_gfx_v11_prepare_map(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ void *q_input)
+{
+struct amdgpu_device *adev = uq_mgr->adev;
+struct mes_add_queue_input *queue_input = q_input;
+
+memset(queue_input, 0x0, sizeof(struct mes_add_queue_input));
+
+queue_input->process_va_start = 0;
+queue_input->process_va_end = (adev->vm_manager.max_pfn - 1) << 
AMDGPU_GPU_PAGE_SHIFT;
+queue_input->process_quantum = 10; /* 10ms */
+queue_input->gang_quantum = 1; /* 1ms */
+queue_input->paging = false;
+
+queue_input->gang_context_addr = queue->gang_ctx.gpu_addr;
+queue_input->process_context_addr = queue->proc_ctx.gpu_addr;
+ 

Re: [PATCH 5/8] drm/amdgpu: Create context for usermode queue

2023-02-06 Thread Christian König

Am 03.02.23 um 22:54 schrieb Shashank Sharma:

The FW expects us to allocate atleast one page as context space to
process gang, process, shadow, GDS and FW_space related work. This
patch creates some object for the same, and adds an IP specific
functions to do this.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |  32 +
  .../amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c | 121 ++
  .../gpu/drm/amd/include/amdgpu_userqueue.h|  18 +++
  3 files changed, 171 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 9f3490a91776..18281b3a51f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -42,6 +42,28 @@ static struct amdgpu_usermode_queue
  return idr_find(&uq_mgr->userq_idr, qid);
  }
  
+static void

+amdgpu_userqueue_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+   struct amdgpu_usermode_queue *queue)
+{
+uq_mgr->userq_mqd_funcs->ctx_destroy(uq_mgr, queue);
+}
+
+static int
+amdgpu_userqueue_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+  struct amdgpu_usermode_queue *queue)
+{
+int r;
+
+r = uq_mgr->userq_mqd_funcs->ctx_create(uq_mgr, queue);
+if (r) {
+DRM_ERROR("Failed to create context space for queue\n");
+return r;
+}
+
+return 0;
+}
+
  static int
  amdgpu_userqueue_create_mqd(struct amdgpu_userq_mgr *uq_mgr, struct 
amdgpu_usermode_queue *queue)
  {
@@ -142,12 +164,21 @@ static int amdgpu_userqueue_create(struct drm_file *filp, 
union drm_amdgpu_userq
  goto free_qid;
  }
  
+r = amdgpu_userqueue_create_ctx_space(uq_mgr, queue);

+if (r) {
+DRM_ERROR("Failed to create context space\n");
+goto free_mqd;
+}
+
  list_add_tail(&queue->userq_node, &uq_mgr->userq_list);
  args->out.q_id = queue->queue_id;
  args->out.flags = 0;
  mutex_unlock(&uq_mgr->userq_mutex);
  return 0;
  
+free_mqd:

+amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
+
  free_qid:
  amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
  
@@ -170,6 +201,7 @@ static void amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)

  }
  
  mutex_lock(&uq_mgr->userq_mutex);

+amdgpu_userqueue_destroy_ctx_space(uq_mgr, queue);
  amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
  amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
  list_del(&queue->userq_node);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
index 57889729d635..687f90a587e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
@@ -120,6 +120,125 @@ amdgpu_userq_gfx_v11_mqd_destroy(struct amdgpu_userq_mgr 
*uq_mgr, struct amdgpu_
  
  }
  
+static int amdgpu_userq_gfx_v11_ctx_create(struct amdgpu_userq_mgr *uq_mgr,

+   struct amdgpu_usermode_queue *queue)
+{
+int r;
+struct amdgpu_device *adev = uq_mgr->adev;
+struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
+struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
+struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
+struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
+struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
+
+/*
+ * The FW expects atleast one page space allocated for
+ * process context related work, and one for gang context.
+ */
+r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_PROC_CTX_SZ, PAGE_SIZE,
+AMDGPU_GEM_DOMAIN_VRAM,
+&pctx->obj,
+&pctx->gpu_addr,
+&pctx->cpu_ptr);


Again, don't use amdgpu_bo_create_kernel() for any of this.


+if (r) {
+DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
+return r;
+}
+
+r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GANG_CTX_SZ, PAGE_SIZE,
+AMDGPU_GEM_DOMAIN_VRAM,
+&gctx->obj,
+&gctx->gpu_addr,
+&gctx->cpu_ptr);
+if (r) {
+DRM_ERROR("Failed to allocate gang bo for userqueue (%d)", r);
+goto err_gangctx;
+}
+
+r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GDS_CTX_SZ, PAGE_SIZE,
+AMDGPU_GEM_DOMAIN_VRAM,
+&gdsctx->obj,
+&gdsctx->gpu_addr,
+&gdsctx->cpu_ptr);
+if (r) {
+DRM_ERROR("Failed to allocate GDS bo for userqueue (%d)", r);
+goto err_gdsctx;
+}
+
+r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_FW_CTX_SZ, PAGE_

Re: [PATCH 3/8] drm/amdgpu: introduce userqueue MQD handlers

2023-02-06 Thread Christian König

Am 03.02.23 um 22:54 schrieb Shashank Sharma:

From: Shashank Sharma 

A Memory queue descriptor (MQD) of a userqueue defines it in the harware's
context. As the method of formation of a MQD, and its format can vary between
different graphics IPs, we need gfx GEN specific handlers to create MQDs.

This patch:
- Introduces MQD hander functions for the usermode queues
- A general function to create and destroy MQD for a userqueue.

V1: Worked on review comments from Alex on RFC patches:
 MQD creation should be gen and IP specific.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 64 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h|  9 +++
  2 files changed, 73 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index d5bc7fe81750..625c2fe1e84a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -42,6 +42,60 @@ static struct amdgpu_usermode_queue
  return idr_find(&uq_mgr->userq_idr, qid);
  }
  
+static int

+amdgpu_userqueue_create_mqd(struct amdgpu_userq_mgr *uq_mgr, struct 
amdgpu_usermode_queue *queue)
+{
+int r;
+int size;
+struct amdgpu_device *adev = uq_mgr->adev;
+
+if (!uq_mgr->userq_mqd_funcs) {
+DRM_ERROR("Userqueue not initialized\n");
+return -EINVAL;
+}
+
+size = uq_mgr->userq_mqd_funcs->mqd_size(uq_mgr);
+r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+AMDGPU_GEM_DOMAIN_VRAM,
+&queue->mqd_obj,
+&queue->mqd_gpu_addr,
+&queue->mqd_cpu_ptr);


We can't use amdgpu_bo_create_kernel() here, this pins the BO.

Instead all BOs of the process must be fenced with some eviction fence.

Christian.


+if (r) {
+DRM_ERROR("Failed to allocate bo for userqueue (%d)", r);
+return r;
+}
+
+memset(queue->mqd_cpu_ptr, 0, size);
+r = amdgpu_bo_reserve(queue->mqd_obj, false);
+if (unlikely(r != 0)) {
+DRM_ERROR("Failed to reserve mqd for userqueue (%d)", r);
+goto free_mqd;
+}
+
+r = uq_mgr->userq_mqd_funcs->mqd_create(uq_mgr, queue);
+amdgpu_bo_unreserve(queue->mqd_obj);
+if (r) {
+DRM_ERROR("Failed to create MQD for queue\n");
+goto free_mqd;
+}
+return 0;
+
+free_mqd:
+amdgpu_bo_free_kernel(&queue->mqd_obj,
+  &queue->mqd_gpu_addr,
+  &queue->mqd_cpu_ptr);
+   return r;
+}
+
+static void
+amdgpu_userqueue_destroy_mqd(struct amdgpu_userq_mgr *uq_mgr, struct 
amdgpu_usermode_queue *queue)
+{
+uq_mgr->userq_mqd_funcs->mqd_destroy(uq_mgr, queue);
+amdgpu_bo_free_kernel(&queue->mqd_obj,
+  &queue->mqd_gpu_addr,
+  &queue->mqd_cpu_ptr);
+}
+
  static int amdgpu_userqueue_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)
  {
  int r, pasid;
@@ -82,12 +136,21 @@ static int amdgpu_userqueue_create(struct drm_file *filp, 
union drm_amdgpu_userq
  goto free_queue;
  }
  
+r = amdgpu_userqueue_create_mqd(uq_mgr, queue);

+if (r) {
+DRM_ERROR("Failed to create MQD\n");
+goto free_qid;
+}
+
  list_add_tail(&queue->userq_node, &uq_mgr->userq_list);
  args->out.q_id = queue->queue_id;
  args->out.flags = 0;
  mutex_unlock(&uq_mgr->userq_mutex);
  return 0;
  
+free_qid:

+amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
+
  free_queue:
  mutex_unlock(&uq_mgr->userq_mutex);
  kfree(queue);
@@ -107,6 +170,7 @@ static void amdgpu_userqueue_destroy(struct drm_file *filp, 
int queue_id)
  }
  
  mutex_lock(&uq_mgr->userq_mutex);

+amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
  amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
  list_del(&queue->userq_node);
  mutex_unlock(&uq_mgr->userq_mutex);
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h 
b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index 9557588fe34f..a6abdfd5cb74 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -26,10 +26,13 @@
  
  #define AMDGPU_MAX_USERQ 512
  
+struct amdgpu_userq_mqd_funcs;

+
  struct amdgpu_userq_mgr {
struct idr userq_idr;
struct mutex userq_mutex;
struct list_head userq_list;
+   const struct amdgpu_userq_mqd_funcs *userq_mqd_funcs;
struct amdgpu_device *adev;
  };
  
@@ -57,6 +60,12 @@ struct amdgpu_usermode_queue {
  
  int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
  
+struct amdgpu_userq_mqd_funcs {

+   int (*mqd_size)(struct amdgpu_userq_mgr *);
+   int (*mqd_create)(struct amdgpu_userq_mgr *, struct 
amdgpu_usermode_queue *);
+   void (*mqd_destroy)(struct amd

Re: [PATCH 2/8] drm/amdgpu: add usermode queues

2023-02-06 Thread Christian König

Am 03.02.23 um 22:54 schrieb Shashank Sharma:

From: Shashank Sharma 

This patch adds skeleton code for usermode queue creation. It
typically contains:
- A new structure to keep all the user queue data in one place.
- An IOCTL function to create/free a usermode queue.
- A function to generate unique index for the queue.
- A queue context manager in driver private data.

V1: Worked on design review comments from RFC patch series:
(https://patchwork.freedesktop.org/series/112214/)

- Alex: Keep a list of queues, instead of single queue per process.
- Christian: Use the queue manager instead of global ptrs,
Don't keep the queue structure in amdgpu_ctx

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |   5 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 155 ++
  .../gpu/drm/amd/include/amdgpu_userqueue.h|  64 
  6 files changed, 230 insertions(+)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_userqueue.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 798d0e9a60b7..764801cc8203 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -210,6 +210,8 @@ amdgpu-y += \
  # add amdkfd interfaces
  amdgpu-y += amdgpu_amdkfd.o
  
+# add usermode queue

+amdgpu-y += amdgpu_userqueue.o
  
  ifneq ($(CONFIG_HSA_AMD),)

  AMDKFD_PATH := ../amdkfd
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6b74df446694..0625d6bdadf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -109,6 +109,7 @@
  #include "amdgpu_fdinfo.h"
  #include "amdgpu_mca.h"
  #include "amdgpu_ras.h"
+#include "amdgpu_userqueue.h"
  
  #define MAX_GPU_INSTANCE		16
  
@@ -482,6 +483,7 @@ struct amdgpu_fpriv {

struct mutexbo_list_lock;
struct idr  bo_list_handles;
struct amdgpu_ctx_mgr   ctx_mgr;
+   struct amdgpu_userq_mgr userq_mgr;
  };
  
  int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b4f2d61ea0d5..229976a2d0e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -52,6 +52,7 @@
  #include "amdgpu_ras.h"
  #include "amdgpu_xgmi.h"
  #include "amdgpu_reset.h"
+#include "amdgpu_userqueue.h"
  
  /*

   * KMS wrapper.
@@ -2748,6 +2749,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
  };
  
  static const struct drm_driver amdgpu_kms_driver = {

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 7aa7e52ca784..52e61e339a88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1187,6 +1187,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, 
struct drm_file *file_priv)
  
  	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
  
+	r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, adev);

+   if (r)
+   DRM_WARN("Can't setup usermode queues, only legacy workload 
submission will work\n");
+
file_priv->driver_priv = fpriv;
goto out_suspend;
  
@@ -1254,6 +1258,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
  
  	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);

amdgpu_vm_fini(adev, &fpriv->vm);
+   amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
  
  	if (pasid)

amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
new file mode 100644
index ..d5bc7fe81750
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyrig

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-06 Thread Christian König

Am 06.02.23 um 22:03 schrieb Alex Deucher:

On Mon, Feb 6, 2023 at 12:01 PM Christian König
 wrote:

Am 06.02.23 um 17:56 schrieb Alex Deucher:

On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma  wrote:

Hey Alex,

On 03/02/2023 23:07, Alex Deucher wrote:

On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma  wrote:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
include/uapi/drm/amdgpu_drm.h | 53 +++
1 file changed, 53 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4038abe8505a..6c5235d107b3 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
#define DRM_AMDGPU_VM  0x13
#define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
#define DRM_AMDGPU_SCHED   0x15
+#define DRM_AMDGPU_USERQ   0x16

#define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
#define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
#define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
#define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)

/**
 * DOC: memory domains
@@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
   union drm_amdgpu_ctx_out out;
};

+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;

Since doorbells are 64 bit, maybe this offset should be in qwords.

Can you please help to cross check this information ? All the existing
kernel doorbell calculations are keeping doorbells size as sizeof(u32)

Doorbells on pre-vega hardware are 32 bits so that is where that comes
from, but from vega onward most doorbells are 64 bit.  I think some
versions of VCN may still use 32 bit doorbells.  Internally in the
kernel driver we just use two slots for newer hardware, but for the
UAPI, I think we can just stick with 64 bit slots to avoid confusion.
Even if an engine only uses a 32 bit one, I don't know that there is
much value to trying to support variable doorbell sizes.

I think we can stick with using __u32 because this is *not* the size of
the doorbell entries.

Instead this is the offset into the BO where to find the doorbell for
this queue (which then in turn is 64bits wide).

Since we will probably never have more than 4GiB doorbells we should be
pretty save to use 32bits here.

Yes, the offset would still be 32 bits, but the units would be qwords.  E.g.,

+   /** Doorbell offset in qwords */
+   __u32   doorbell_offset;

That way you couldn't accidently specify an overlapping doorbell.


Ah, so you only wanted to fix the comment. That was absolutely not clear 
from the discussion.


Christian.



Alex


Christian.


Alex


+   /** GPU virtual address of the queue */
+   __u64   queue_va;
+   /** Size of the queue in bytes */
+   __u64   queue_size;
+   /** GPU virtual address of the rptr */
+   __u64   rptr_va;
+   /** GPU virtual address of the wptr */
+   __u64   wptr_va;
+};
+
+struct drm_amdgpu_userq_in {
+   /** AMDGPU_USERQ_OP_* */
+   __u32   op;
+   /** Flags */
+   __u32   flags;
+   /** Queue handle to associate the queue free call with,
+* unused for queue create calls */
+   __u32   queue_id;
+   __u32   pad;
+   /** Queue descriptor */
+   struct drm_amdgpu_userq_mqd mqd;
+};
+
+struct drm_amdgpu_userq_out {
+   /** Queue handle */
+   __u32   q_id;

Maybe this should be queue_id to match the input.

Agree.

- Shashank


Alex


+   /** Flags */
+   __u32   flags;
+};
+
+union drm_amdgpu_userq {
+   struct drm_amdgpu_userq_in in;
+   struct drm_amdgpu_userq_out 

Re: Indexing of FeatureCtrlMask for SMU13 OverDrive

2023-02-06 Thread Alex Deucher
On Mon, Feb 6, 2023 at 8:17 PM Matt Coffin  wrote:
>
> Hello again,
>
> I've been working on OverDrive support for smu13, as you probably
> already know. In that endeavor, it also contains the following:
>
> 1. I've come up with a few patterns that I think will reduce the
> amount of boilerplate and SMU-specific code required to do
> implement these interfaces in the future.
> 2. Since the old pp_od_clk_voltage sysfs interface is inadequate for
> usage in setting values other than a few indexed clock/voltage settings,
> I'll likely be sending a proposed "generic" interface, where OD settings
> are exposed to userspace by ASIC-specific indexed identifiers.
>
> But, those are beside the point, for now.
>
> While picking through the existing headers, the information in
> smu_v13_0_0_pptable.h seems to not quite be in line with what I'm seeing
> coming from the card, so I'm instead focusing mainly on
> smu13_driver_if_v13_0_0.h.
>
> In the two OverDrive-related structs, OverDriveTable_t and
> OverDriveLimits_t, the FeatureCtrlMask member seems to be controlling
> which of the "features" of OverDrive would actually be in use. As of
> yet, I haven't been able to find an index of what the bits in here
> actually mean. Is there any way you could help me out with that?

I can ask tomorrow.  That said, we are working on OD support and
should have patches available soon.

Alex



>
> My best guess thus far is that they are by each element of the
> OverDriveTable_t struct, but that's only just a guess.
>
> For reference, here are the values I'm seeing present in each at boot
> time.
>
> Since FeatureCtrlMask is 0b1001101, the current theory is that the
> "unsupported" features would be VddGfxVmax, GfxclkFmin, GfxclkFmax. Does
> that line up with what we'd be expecting for this ASIC?
>
> Thanks in advance for any information you can provide. I really
> appreciate the work that you all do.
>
> Thanks,
> Matt
>
> OverDriveLimits:
> FeatureCtrlMask: [0x07cd, 0x07cd]
> VoltageOffsetPerZoneBoundary: [-450, 0]
> VddGfxVmax: [0, 0]
> IdlePwrSavingFeaturesCtrl: [0x00, 0x00]
> RuntimePwrSavingFeaturesCtrl: [0x00, 0x00]
> GfxclkFmin: [500, 5000]
> GfxclkFmax: [500, 5000]
> UclkFmin: [97, 1500]
> UclkFmax: [97, 1500]
> Ppt: [-10, 15], Tdc: [-10, 0]
> FanLinearPwmPoints: [23, 100]
> FanLinearTempPoints: [25, 100]
> FanMinimumPwm: [23, 100]
> AcousticTargetRpmThreshold: [500, 3200]
> AcousticLimitRpmThreshold: [500, 3200]
> FanTargetTemperature: [25, 105]
> FanZeroRpmEnable: [0, 1]
> FanZeroRpmStopTemp: [25, 100]
> FanMode: [0, 1]
> MaxOpTemp: [50, 110]
> OverDriveTable:
> FeatureCtrlMask: 0x
> VoltageOffsetPerZoneBoundary[0]: 0
> VoltageOffsetPerZoneBoundary[1]: 0
> VoltageOffsetPerZoneBoundary[2]: 0
> VoltageOffsetPerZoneBoundary[3]: 0
> VoltageOffsetPerZoneBoundary[4]: 0
> VoltageOffsetPerZoneBoundary[5]: 0
> VddGfxVmax: 1150
> IdlePwrSavingFeaturesCtrl: 0x00
> RuntimePwrSavingFeaturesCtrl: 0x00
> GfxclkFmin: 500
> GfxclkFmax: 2890
> UclkFmin: 97
> UclkFmax: 1249
> Ppt: 0
> Tdc: 0
> FanLinearPwmPoints[0]: 0
> FanLinearPwmPoints[1]: 0
> FanLinearPwmPoints[2]: 0
> FanLinearPwmPoints[3]: 0
> FanLinearPwmPoints[4]: 0
> FanLinearPwmPoints[5]: 0
> FanLinearTempPoints[0]: 0
> FanLinearTempPoints[1]: 0
> FanLinearTempPoints[2]: 0
> FanLinearTempPoints[3]: 0
> FanLinearTempPoints[4]: 0
> FanLinearTempPoints[5]: 0
> FanMinimumPwm: 35
> AcousticTargetRpmThreshold: 1250
> AcousticLimitRpmThreshold: 1500
> FanTargetTemperature: 94
> FanZeroRpmEnable: 1
> FanZeroRpmStopTemp: 55
> FanMode: 0
> MaxOpTemp: 110


Re: gpu_metrics does not provide 'current_gfxclk', 'current_uclk', 'average_cpu_power' & 'temperature_core' on AMD Ryzen 7000 CPU

2023-02-06 Thread Alex Deucher
On Mon, Feb 6, 2023 at 5:48 PM sfrcorne  wrote:
>
> Dear Alex,
>
> First of all, thank you for your response. Personally, I use a Ryzen 5 7600X 
> however people with a Ryzen 9 7900X are also reporting this issue. The 
> relevant bug report in Mangohud can be found here: 
> "https://github.com/flightlessmango/MangoHud/issues/868";.
>
> I looked around a bit in both the Mangohud source code and the Linux kernel 
> source code.
>
> (Mangohud source): From what I understand, Mangohud looks for a file 
> "/sys/class/drm/card*/device/gpu_metrics". If this file exists (and it does 
> exists on my machine), it tries to read this file and extract the relevant 
> GPU data (and in case of an APU also the CPU data) from it (these are the 
> values I was talking about in my previous mail). When the file 
> "/sys/class/drm/card*/device/gpu_metrics" exists, it will not use the data 
> provided by hwmon (/sys/class/hwmon/hwmon*/*).
>
> (Linux kernel): The gpu_metrics file contains different data, depending on 
> what version is used. All valid versions can be found in the source code: 
> "https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/amd/include/kgd_pp_interface.h#L725";.
>  For my CPU/APU the 'gpu_metrics_v2_1' structure is used (I tested this by 
> reading the gpu_metrics file myself). Furthermore, I think that for my case, 
> this structure is set by the function 
> "https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c#L459";
>  but I am not completely sure about this.

The metrics provided by the SMU firmware varies from asic to asic.
For things that are not supported by the metrics table for a
particular asic, those fields would be 0.  You can see what metrics
are supported for your asic in smu_v13_0_5_get_gpu_metrics() as that
function populates the supported fields from the firmware to the
common structure.  current_gfxclk is not supported in your asic, but
average_gfxclk_frequency is.  So you'd want to use whichever field is
available for a particular asic in Mangohud.

>
> Lastly, I am not familiar with umr. I assume that you are referring to 
> "https://gitlab.freedesktop.org/tomstdenis/umr";? If I find some time this 
> weekend, then I will look into this some more.

Yes, that is the right link.  umr uses the same interface as mangohud,
so you should see the same data.

Alex


>
> Kind regards,
> sfrcorne
>
> --- Original Message ---
> On Monday, February 6th, 2023 at 22:22, Alex Deucher  
> wrote:
>
> > On Mon, Feb 6, 2023 at 9:22 AM sfrcorne sfrco...@protonmail.com wrote:
> >
> > > Hello,
> > >
> > > I hope this is the correct place to ask my question. I was not sure if I 
> > > should have opened a new issue on Gitlab or send an email here, since I 
> > > don't know know whether this is a bug or intended behaviour.
> > >
> > > The question is about the new AMD Ryzen 7000 CPU's. These new CPU's have 
> > > an iGPU and consequently provide a gpu_metrics file for monitoring the 
> > > GPU/CPU (APU?). This file is used by programs like Mangohud, that try to 
> > > read (among other values) the following 4 values:
> > > - current_gfxclk
> > > - current_uclk
> > > - average_cpu_power
> > > - temperature_core
> > > However it appears that on AMD Ryzen 7000 CPU's these 4 values are not 
> > > provided/updated in the gpu_metrics file. Other values like 
> > > 'average_core_power', 'temperature_l3' and the other 'current_clk' are 
> > > also not provided/updated but these are not used by Mangohud at the 
> > > moment.
> > >
> > > Is this intentional or a bug? And will this be fix and/or will support 
> > > for these 4 values be added in the future?
> >
> >
> > What specific CPU/APU is this? I don't recall off hand how mangohud
> > queries this stuff, but you can take a look at the hwmon interfaces
> > exposed by the driver or if you want the whole metrics table, you can
> > use umr to fetch and decode it via the kernel interface. That will
> > allow you to verify that the firmware is producing the proper data.
> >
> > Alex


[PATCH] drm/amd/display: Align num_crtc to max_streams

2023-02-06 Thread Tianci Yin
From: tiancyin 

[Why]
Display pipe might be harvested on some SKUs, that cause the
adev->mode_info.num_crtc mismatch with the usable crtc number,
then below error dmesgs observed after GPU recover.

  *ERROR* amdgpu_dm_set_crtc_irq_state: crtc is NULL at id :3
  *ERROR* amdgpu_dm_set_crtc_irq_state: crtc is NULL at id :3
  *ERROR* amdgpu_dm_set_crtc_irq_state: crtc is NULL at id :3
  *ERROR* amdgpu_dm_set_pflip_irq_state: crtc is NULL at id :3
  *ERROR* amdgpu_dm_set_pflip_irq_state: crtc is NULL at id :3
  *ERROR* amdgpu_dm_set_pflip_irq_state: crtc is NULL at id :3
  *ERROR* amdgpu_dm_set_pflip_irq_state: crtc is NULL at id :3
  *ERROR* amdgpu_dm_set_vupdate_irq_state: crtc is NULL at id :3
  *ERROR* amdgpu_dm_set_vupdate_irq_state: crtc is NULL at id :3
  *ERROR* amdgpu_dm_set_vupdate_irq_state: crtc is NULL at id :3

[How]
The max_streams is limited number after pipe fuse, align num_crtc
to max_streams to eliminate the error logs.

Signed-off-by: tiancyin 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b31cfda30ff9..87ec2574cc09 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4285,6 +4285,9 @@ static int amdgpu_dm_initialize_drm_device(struct 
amdgpu_device *adev)
break;
}
 
+   /* Adjust the crtc number according to the DCN pipe fuse. */
+   adev->mode_info.num_crtc = dm->dc->caps.max_streams;
+
for (i = 0; i < dm->dc->caps.max_streams; i++)
if (amdgpu_dm_crtc_init(dm, mode_info->planes[i], i)) {
DRM_ERROR("KMS: Failed to initialize crtc\n");
-- 
2.34.1



RE: [PATCH] drm/amd/pm/smu7: move variables to where they are used

2023-02-06 Thread Quan, Evan
[AMD Official Use Only - General]

Reviewed-by: Evan Quan 

> -Original Message-
> From: Deucher, Alexander 
> Sent: Tuesday, February 7, 2023 1:05 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Quan, Evan
> 
> Subject: [PATCH] drm/amd/pm/smu7: move variables to where they are
> used
> 
> Move variable declarations to where they are used.  Fixes
> a segfault on smu7 V0 structures where some tables don't
> exist.
> 
> Cc: Evan Quan 
> Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2388
> Fixes: 711d3c39503b ("drm/amd/pm: fulfill powerplay peak profiling mode
> shader/memory clock settings")
> Signed-off-by: Alex Deucher 
> ---
>  .../gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c| 14 --
> 
>  1 file changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
> b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
> index 89fc32318d80..e10cc5e7928e 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
> +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
> @@ -1504,12 +1504,6 @@ static void
> smu7_populate_umdpstate_clocks(struct pp_hwmgr *hwmgr)
>  {
>   struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr-
> >backend);
>   struct smu7_dpm_table *golden_dpm_table = &data-
> >golden_dpm_table;
> - struct phm_clock_voltage_dependency_table
> *vddc_dependency_on_sclk =
> - hwmgr->dyn_state.vddc_dependency_on_sclk;
> - struct phm_ppt_v1_information *table_info =
> - (struct phm_ppt_v1_information *)(hwmgr-
> >pptable);
> - struct phm_ppt_v1_clock_voltage_dependency_table
> *vdd_dep_on_sclk =
> - table_info->vdd_dep_on_sclk;
>   int32_t tmp_sclk, count, percentage;
> 
>   if (golden_dpm_table->mclk_table.count == 1) {
> @@ -1524,6 +1518,9 @@ static void
> smu7_populate_umdpstate_clocks(struct pp_hwmgr *hwmgr)
>   tmp_sclk = hwmgr->pstate_mclk * percentage / 100;
> 
>   if (hwmgr->pp_table_version == PP_TABLE_V0) {
> + struct phm_clock_voltage_dependency_table
> *vddc_dependency_on_sclk =
> + hwmgr->dyn_state.vddc_dependency_on_sclk;
> +
>   for (count = vddc_dependency_on_sclk->count - 1; count >=
> 0; count--) {
>   if (tmp_sclk >= vddc_dependency_on_sclk-
> >entries[count].clk) {
>   hwmgr->pstate_sclk =
> vddc_dependency_on_sclk->entries[count].clk;
> @@ -1536,6 +1533,11 @@ static void
> smu7_populate_umdpstate_clocks(struct pp_hwmgr *hwmgr)
>   hwmgr->pstate_sclk_peak =
>   vddc_dependency_on_sclk-
> >entries[vddc_dependency_on_sclk->count - 1].clk;
>   } else if (hwmgr->pp_table_version == PP_TABLE_V1) {
> + struct phm_ppt_v1_information *table_info =
> + (struct phm_ppt_v1_information *)(hwmgr-
> >pptable);
> + struct phm_ppt_v1_clock_voltage_dependency_table
> *vdd_dep_on_sclk =
> + table_info->vdd_dep_on_sclk;
> +
>   for (count = vdd_dep_on_sclk->count - 1; count >= 0; count--)
> {
>   if (tmp_sclk >= vdd_dep_on_sclk->entries[count].clk)
> {
>   hwmgr->pstate_sclk = vdd_dep_on_sclk-
> >entries[count].clk;
> --
> 2.39.1


RE: [PATCH 1/2] drm/amd/pm: bump SMU 13.0.0 driver_if header version

2023-02-06 Thread Chen, Guchun
Acked-by: Guchun Chen 

Regards,
Guchun

-Original Message-
From: amd-gfx  On Behalf Of Evan Quan
Sent: Tuesday, February 7, 2023 10:49 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Quan, Evan 

Subject: [PATCH 1/2] drm/amd/pm: bump SMU 13.0.0 driver_if header version

This can suppress the warning caused by version mismatch.

Signed-off-by: Evan Quan 
Change-Id: I5e62de359015ac93b2dfd6a257584a5e6d38e1f8
---
 .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h   | 5 +++--
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index d6b964cf73bd..4bc7aee4d44f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -123,7 +123,8 @@
(1 << 
FEATURE_DS_FCLK_BIT) | \
(1 << 
FEATURE_DS_LCLK_BIT) | \
(1 << 
FEATURE_DS_DCFCLK_BIT) | \
-   (1 << 
FEATURE_DS_UCLK_BIT))
+   (1 << 
FEATURE_DS_UCLK_BIT) | \
+   (1ULL 
<< FEATURE_DS_VCN_BIT))
 
 //For use with feature control messages  typedef enum { @@ -522,9 +523,9 @@ 
typedef enum  {
   TEMP_HOTSPOT_M,
   TEMP_MEM,
   TEMP_VR_GFX,
-  TEMP_VR_SOC,
   TEMP_VR_MEM0,
   TEMP_VR_MEM1,
+  TEMP_VR_SOC,
   TEMP_VR_U,
   TEMP_LIQUID0,
   TEMP_LIQUID1,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 913d3a8d7e2f..40adc4fa808a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -28,7 +28,7 @@
 #define SMU13_DRIVER_IF_VERSION_INV 0x  #define 
SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04  #define SMU13_DRIVER_IF_VERSION_ALDE 
0x08 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x34
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x37
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07  #define 
SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04  #define 
SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32
--
2.34.1



Re: [PATCH 2/2] drm/amd/pm: bump SMU 13.0.7 driver_if header version

2023-02-06 Thread Alex Deucher
Series is:
Acked-by: Alex Deucher 

On Mon, Feb 6, 2023 at 9:49 PM Evan Quan  wrote:
>
> This can suppress the warning caused by version mismatch.
>
> Signed-off-by: Evan Quan 
> Change-Id: Id3331a329ea9b1dbc45d8a4e773af1cbe8e21a27
> ---
>  .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h | 3 ++-
>  drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h   | 2 +-
>  2 files changed, 3 insertions(+), 2 deletions(-)
>
> diff --git 
> a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h 
> b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
> index d6b13933a98f..3446a569057a 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
> @@ -126,7 +126,8 @@
>   (1 << FEATURE_DS_FCLK_BIT) | \
>   (1 << FEATURE_DS_LCLK_BIT) | \
>   (1 << FEATURE_DS_DCFCLK_BIT) | \
> - (1 << FEATURE_DS_UCLK_BIT)
> + (1 << FEATURE_DS_UCLK_BIT) | \
> + (1ULL << FEATURE_DS_VCN_BIT)
>
>  //For use with feature control messages
>  typedef enum {
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
> b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
> index 40adc4fa808a..1c0ae2cb757b 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
> @@ -32,7 +32,7 @@
>  #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07
>  #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
>  #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32
> -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x35
> +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x37
>  #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_10 0x1D
>
>  #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500  //500ms
> --
> 2.34.1
>


Re: gpu_metrics does not provide 'current_gfxclk', 'current_uclk', 'average_cpu_power' & 'temperature_core' on AMD Ryzen 7000 CPU

2023-02-06 Thread sfrcorne
Dear Alex,

First of all, thank you for your response. Personally, I use a Ryzen 5 7600X 
however people with a Ryzen 9 7900X are also reporting this issue. The relevant 
bug report in Mangohud can be found here: 
"https://github.com/flightlessmango/MangoHud/issues/868";.

I looked around a bit in both the Mangohud source code and the Linux kernel 
source code.

(Mangohud source): From what I understand, Mangohud looks for a file 
"/sys/class/drm/card*/device/gpu_metrics". If this file exists (and it does 
exists on my machine), it tries to read this file and extract the relevant GPU 
data (and in case of an APU also the CPU data) from it (these are the values I 
was talking about in my previous mail). When the file 
"/sys/class/drm/card*/device/gpu_metrics" exists, it will not use the data 
provided by hwmon (/sys/class/hwmon/hwmon*/*).

(Linux kernel): The gpu_metrics file contains different data, depending on what 
version is used. All valid versions can be found in the source code: 
"https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/amd/include/kgd_pp_interface.h#L725";.
 For my CPU/APU the 'gpu_metrics_v2_1' structure is used (I tested this by 
reading the gpu_metrics file myself). Furthermore, I think that for my case, 
this structure is set by the function 
"https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c#L459";
 but I am not completely sure about this.

Lastly, I am not familiar with umr. I assume that you are referring to 
"https://gitlab.freedesktop.org/tomstdenis/umr";? If I find some time this 
weekend, then I will look into this some more.

Kind regards,
sfrcorne

--- Original Message ---
On Monday, February 6th, 2023 at 22:22, Alex Deucher  
wrote:

> On Mon, Feb 6, 2023 at 9:22 AM sfrcorne sfrco...@protonmail.com wrote:
> 
> > Hello,
> > 
> > I hope this is the correct place to ask my question. I was not sure if I 
> > should have opened a new issue on Gitlab or send an email here, since I 
> > don't know know whether this is a bug or intended behaviour.
> > 
> > The question is about the new AMD Ryzen 7000 CPU's. These new CPU's have an 
> > iGPU and consequently provide a gpu_metrics file for monitoring the GPU/CPU 
> > (APU?). This file is used by programs like Mangohud, that try to read 
> > (among other values) the following 4 values:
> > - current_gfxclk
> > - current_uclk
> > - average_cpu_power
> > - temperature_core
> > However it appears that on AMD Ryzen 7000 CPU's these 4 values are not 
> > provided/updated in the gpu_metrics file. Other values like 
> > 'average_core_power', 'temperature_l3' and the other 'current_clk' are 
> > also not provided/updated but these are not used by Mangohud at the moment.
> > 
> > Is this intentional or a bug? And will this be fix and/or will support for 
> > these 4 values be added in the future?
> 
> 
> What specific CPU/APU is this? I don't recall off hand how mangohud
> queries this stuff, but you can take a look at the hwmon interfaces
> exposed by the driver or if you want the whole metrics table, you can
> use umr to fetch and decode it via the kernel interface. That will
> allow you to verify that the firmware is producing the proper data.
> 
> Alex


[PATCH 1/2] drm/amd/pm: bump SMU 13.0.0 driver_if header version

2023-02-06 Thread Evan Quan
This can suppress the warning caused by version mismatch.

Signed-off-by: Evan Quan 
Change-Id: I5e62de359015ac93b2dfd6a257584a5e6d38e1f8
---
 .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h   | 5 +++--
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index d6b964cf73bd..4bc7aee4d44f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -123,7 +123,8 @@
(1 << 
FEATURE_DS_FCLK_BIT) | \
(1 << 
FEATURE_DS_LCLK_BIT) | \
(1 << 
FEATURE_DS_DCFCLK_BIT) | \
-   (1 << 
FEATURE_DS_UCLK_BIT))
+   (1 << 
FEATURE_DS_UCLK_BIT) | \
+   (1ULL 
<< FEATURE_DS_VCN_BIT))
 
 //For use with feature control messages
 typedef enum {
@@ -522,9 +523,9 @@ typedef enum  {
   TEMP_HOTSPOT_M,
   TEMP_MEM,
   TEMP_VR_GFX,
-  TEMP_VR_SOC,
   TEMP_VR_MEM0,
   TEMP_VR_MEM1,
+  TEMP_VR_SOC,
   TEMP_VR_U,
   TEMP_LIQUID0,
   TEMP_LIQUID1,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 913d3a8d7e2f..40adc4fa808a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -28,7 +28,7 @@
 #define SMU13_DRIVER_IF_VERSION_INV 0x
 #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x34
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x37
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32
-- 
2.34.1



[PATCH 2/2] drm/amd/pm: bump SMU 13.0.7 driver_if header version

2023-02-06 Thread Evan Quan
This can suppress the warning caused by version mismatch.

Signed-off-by: Evan Quan 
Change-Id: Id3331a329ea9b1dbc45d8a4e773af1cbe8e21a27
---
 .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h | 3 ++-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
index d6b13933a98f..3446a569057a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
@@ -126,7 +126,8 @@
  (1 << FEATURE_DS_FCLK_BIT) | \
  (1 << FEATURE_DS_LCLK_BIT) | \
  (1 << FEATURE_DS_DCFCLK_BIT) | \
- (1 << FEATURE_DS_UCLK_BIT)
+ (1 << FEATURE_DS_UCLK_BIT) | \
+ (1ULL << FEATURE_DS_VCN_BIT)
 
 //For use with feature control messages
 typedef enum {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 40adc4fa808a..1c0ae2cb757b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -32,7 +32,7 @@
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x35
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x37
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_10 0x1D
 
 #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500  //500ms
-- 
2.34.1



RE: [PATCH] drm/amd/pm: add SMU 13.0.7 missing GetPptLimit message mapping

2023-02-06 Thread Xu, Feifei
[AMD Official Use Only - General]



Reviewed-by: Feifei Xu 

-Original Message-
From: amd-gfx  On Behalf Of Evan Quan
Sent: Friday, February 3, 2023 5:39 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Quan, Evan 

Subject: [PATCH] drm/amd/pm: add SMU 13.0.7 missing GetPptLimit message mapping

Add missing GetPptLimit message mapping.

Signed-off-by: Evan Quan 
Change-Id: Ic4edfa3153988721a6ee66dd69a1d4ca8a5ea45c
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 02ee248899c0..6a882c4f7cee 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -124,6 +124,7 @@ static struct cmn2asic_msg_mapping 
smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(DFCstateControl,
PPSMC_MSG_SetExternalClientDfCstateAllow, 0),
MSG_MAP(ArmD3,  PPSMC_MSG_ArmD3,
   0),
MSG_MAP(AllowGpo,   PPSMC_MSG_SetGpoAllow,  
 0),
+   MSG_MAP(GetPptLimit,PPSMC_MSG_GetPptLimit,  
   0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
-- 
2.34.1


Indexing of FeatureCtrlMask for SMU13 OverDrive

2023-02-06 Thread Matt Coffin
Hello again,

I've been working on OverDrive support for smu13, as you probably
already know. In that endeavor, it also contains the following:

1. I've come up with a few patterns that I think will reduce the
amount of boilerplate and SMU-specific code required to do
implement these interfaces in the future.
2. Since the old pp_od_clk_voltage sysfs interface is inadequate for
usage in setting values other than a few indexed clock/voltage settings,
I'll likely be sending a proposed "generic" interface, where OD settings
are exposed to userspace by ASIC-specific indexed identifiers.

But, those are beside the point, for now.

While picking through the existing headers, the information in
smu_v13_0_0_pptable.h seems to not quite be in line with what I'm seeing
coming from the card, so I'm instead focusing mainly on
smu13_driver_if_v13_0_0.h.

In the two OverDrive-related structs, OverDriveTable_t and
OverDriveLimits_t, the FeatureCtrlMask member seems to be controlling
which of the "features" of OverDrive would actually be in use. As of
yet, I haven't been able to find an index of what the bits in here
actually mean. Is there any way you could help me out with that?

My best guess thus far is that they are by each element of the
OverDriveTable_t struct, but that's only just a guess.

For reference, here are the values I'm seeing present in each at boot
time.

Since FeatureCtrlMask is 0b1001101, the current theory is that the
"unsupported" features would be VddGfxVmax, GfxclkFmin, GfxclkFmax. Does
that line up with what we'd be expecting for this ASIC?

Thanks in advance for any information you can provide. I really
appreciate the work that you all do.

Thanks,
Matt

OverDriveLimits:
FeatureCtrlMask: [0x07cd, 0x07cd]
VoltageOffsetPerZoneBoundary: [-450, 0]
VddGfxVmax: [0, 0]
IdlePwrSavingFeaturesCtrl: [0x00, 0x00]
RuntimePwrSavingFeaturesCtrl: [0x00, 0x00]
GfxclkFmin: [500, 5000]
GfxclkFmax: [500, 5000]
UclkFmin: [97, 1500]
UclkFmax: [97, 1500]
Ppt: [-10, 15], Tdc: [-10, 0]
FanLinearPwmPoints: [23, 100]
FanLinearTempPoints: [25, 100]
FanMinimumPwm: [23, 100]
AcousticTargetRpmThreshold: [500, 3200]
AcousticLimitRpmThreshold: [500, 3200]
FanTargetTemperature: [25, 105]
FanZeroRpmEnable: [0, 1]
FanZeroRpmStopTemp: [25, 100]
FanMode: [0, 1]
MaxOpTemp: [50, 110]
OverDriveTable:
FeatureCtrlMask: 0x
VoltageOffsetPerZoneBoundary[0]: 0
VoltageOffsetPerZoneBoundary[1]: 0
VoltageOffsetPerZoneBoundary[2]: 0
VoltageOffsetPerZoneBoundary[3]: 0
VoltageOffsetPerZoneBoundary[4]: 0
VoltageOffsetPerZoneBoundary[5]: 0
VddGfxVmax: 1150
IdlePwrSavingFeaturesCtrl: 0x00
RuntimePwrSavingFeaturesCtrl: 0x00
GfxclkFmin: 500
GfxclkFmax: 2890
UclkFmin: 97
UclkFmax: 1249
Ppt: 0
Tdc: 0
FanLinearPwmPoints[0]: 0
FanLinearPwmPoints[1]: 0
FanLinearPwmPoints[2]: 0
FanLinearPwmPoints[3]: 0
FanLinearPwmPoints[4]: 0
FanLinearPwmPoints[5]: 0
FanLinearTempPoints[0]: 0
FanLinearTempPoints[1]: 0
FanLinearTempPoints[2]: 0
FanLinearTempPoints[3]: 0
FanLinearTempPoints[4]: 0
FanLinearTempPoints[5]: 0
FanMinimumPwm: 35
AcousticTargetRpmThreshold: 1250
AcousticLimitRpmThreshold: 1500
FanTargetTemperature: 94
FanZeroRpmEnable: 1
FanZeroRpmStopTemp: 55
FanMode: 0
MaxOpTemp: 110


Re: [PATCH] drm/amdgpu: Use the TGID for trace_amdgpu_vm_update_ptes

2023-02-06 Thread Friedrich Vock

Hi,

thanks for applying the patch!

Do you think it'd also be possible to backport it to previous kernel
versions or do you already plan to do that?
Since it is a one-liner bugfix it shouldn't be too hard to backport.

Thank you,
Friedrich Vock

On 06.02.23 21:26, Alex Deucher wrote:

Applied.  Thanks!

Alex

On Mon, Feb 6, 2023 at 3:35 AM Christian König  wrote:



Am 02.02.23 um 17:21 schrieb Friedrich Vock:

The pid field corresponds to the result of gettid() in userspace.
However, userspace cannot reliably attribute PTE events to processes
with just the thread id. This patch allows userspace to easily
attribute PTE update events to specific processes by comparing this
field with the result of getpid().

For attributing events to specific threads, the thread id is also
contained in the common fields of each trace event.

Signed-off-by: Friedrich Vock 

Ah, yes that makes more sense. Reviewed-by: Christian König


Alex do you pick this up or should I take care of it?

Thanks,
Christian.


---
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index b5f3bba851db..01e42bdd8e4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -974,7 +974,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params 
*params,
   trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
   min(nptes, 32u), dst, incr,
   upd_flags,
- vm->task_info.pid,
+ vm->task_info.tgid,
   vm->immediate.fence_context);
   amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
  cursor.level, pe_start, dst,
--
2.39.1



Re: gpu_metrics does not provide 'current_gfxclk', 'current_uclk', 'average_cpu_power' & 'temperature_core' on AMD Ryzen 7000 CPU

2023-02-06 Thread Alex Deucher
On Mon, Feb 6, 2023 at 9:22 AM sfrcorne  wrote:
>
> Hello,
>
> I hope this is the correct place to ask my question. I was not sure if I 
> should have opened a new issue on Gitlab or send an email here, since I don't 
> know know whether this is a bug or intended behaviour.
>
> The question is about the new AMD Ryzen 7000 CPU's. These new CPU's have an 
> iGPU and consequently provide a gpu_metrics file for monitoring the GPU/CPU 
> (APU?). This file is used by programs like Mangohud, that try to read (among 
> other values) the following 4 values:
>  - current_gfxclk
>  - current_uclk
>  - average_cpu_power
>  - temperature_core
> However it appears that on AMD Ryzen 7000 CPU's these 4 values are not 
> provided/updated in the gpu_metrics file. Other values like 
> 'average_core_power', 'temperature_l3' and the other 'current_clk' are 
> also not provided/updated but these are not used by Mangohud at the moment.
>
> Is this intentional or a bug? And will this be fix and/or will support for 
> these 4 values be added in the future?

What specific CPU/APU is this?  I don't recall off hand how mangohud
queries this stuff, but you can take a look at the hwmon interfaces
exposed by the driver or if you want the whole metrics table, you can
use umr to fetch and decode it via the kernel interface.  That will
allow you to verify that the firmware is producing the proper data.

Alex


Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-06 Thread Alex Deucher
On Mon, Feb 6, 2023 at 12:01 PM Christian König
 wrote:
>
> Am 06.02.23 um 17:56 schrieb Alex Deucher:
> > On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma  
> > wrote:
> >> Hey Alex,
> >>
> >> On 03/02/2023 23:07, Alex Deucher wrote:
> >>> On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma  
> >>> wrote:
>  From: Alex Deucher 
> 
>  This patch intorduces new UAPI/IOCTL for usermode graphics
>  queue. The userspace app will fill this structure and request
>  the graphics driver to add a graphics work queue for it. The
>  output of this UAPI is a queue id.
> 
>  This UAPI maps the queue into GPU, so the graphics app can start
>  submitting work to the queue as soon as the call returns.
> 
>  Cc: Alex Deucher 
>  Cc: Christian Koenig 
>  Signed-off-by: Alex Deucher 
>  Signed-off-by: Shashank Sharma 
>  ---
> include/uapi/drm/amdgpu_drm.h | 53 +++
> 1 file changed, 53 insertions(+)
> 
>  diff --git a/include/uapi/drm/amdgpu_drm.h 
>  b/include/uapi/drm/amdgpu_drm.h
>  index 4038abe8505a..6c5235d107b3 100644
>  --- a/include/uapi/drm/amdgpu_drm.h
>  +++ b/include/uapi/drm/amdgpu_drm.h
>  @@ -54,6 +54,7 @@ extern "C" {
> #define DRM_AMDGPU_VM  0x13
> #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
> #define DRM_AMDGPU_SCHED   0x15
>  +#define DRM_AMDGPU_USERQ   0x16
> 
> #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
>  DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> #define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
>  DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>  @@ -71,6 +72,7 @@ extern "C" {
> #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
>  DRM_AMDGPU_VM, union drm_amdgpu_vm)
> #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
>  DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
>  DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
>  +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
>  DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> 
> /**
>  * DOC: memory domains
>  @@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
>    union drm_amdgpu_ctx_out out;
> };
> 
>  +/* user queue IOCTL */
>  +#define AMDGPU_USERQ_OP_CREATE 1
>  +#define AMDGPU_USERQ_OP_FREE   2
>  +
>  +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
>  +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
>  +
>  +struct drm_amdgpu_userq_mqd {
>  +   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
>  +   __u32   flags;
>  +   /** IP type: AMDGPU_HW_IP_* */
>  +   __u32   ip_type;
>  +   /** GEM object handle */
>  +   __u32   doorbell_handle;
>  +   /** Doorbell offset in dwords */
>  +   __u32   doorbell_offset;
> >>> Since doorbells are 64 bit, maybe this offset should be in qwords.
> >> Can you please help to cross check this information ? All the existing
> >> kernel doorbell calculations are keeping doorbells size as sizeof(u32)
> > Doorbells on pre-vega hardware are 32 bits so that is where that comes
> > from, but from vega onward most doorbells are 64 bit.  I think some
> > versions of VCN may still use 32 bit doorbells.  Internally in the
> > kernel driver we just use two slots for newer hardware, but for the
> > UAPI, I think we can just stick with 64 bit slots to avoid confusion.
> > Even if an engine only uses a 32 bit one, I don't know that there is
> > much value to trying to support variable doorbell sizes.
>
> I think we can stick with using __u32 because this is *not* the size of
> the doorbell entries.
>
> Instead this is the offset into the BO where to find the doorbell for
> this queue (which then in turn is 64bits wide).
>
> Since we will probably never have more than 4GiB doorbells we should be
> pretty save to use 32bits here.

Yes, the offset would still be 32 bits, but the units would be qwords.  E.g.,

+   /** Doorbell offset in qwords */
+   __u32   doorbell_offset;

That way you couldn't accidently specify an overlapping doorbell.

Alex

>
> Christian.
>
> >
> > Alex
> >
>  +   /** GPU virtual address of the queue */
>  +   __u64   queue_va;
>  +   /** Size of the queue in bytes */
>  +   __u64   queue_size;
>  +   /** GPU virtual address of the rptr */
>  +   __u64   rptr_va;
>  +   /** GPU virtual address of the wptr */
>  +   __u64   wptr_va;
>  +};
>  +
>  +struct drm_amdgpu_userq_in {
>  +   /** AMDGPU_USERQ_OP_* */
>  +   __u32   op;
>  +   /** Flags */
>  +   __u32   flags;
>  +   /** Queue handle to associate the queue free

Re: [PATCH] drm/amdkfd: To fix sdma page fault issue for GC 11.x

2023-02-06 Thread Felix Kuehling

On 2023-02-06 07:58, Ji, Ruili wrote:

From: Ruili Ji 

For the MQD memory, KMD would always allocate 4K memory,
and mes scheduler would write to the end of MQD for unmap flag.

Signed-off-by: Ruili Ji 
---
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 20 +++
  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  | 12 +--
  2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c06ada0844ba..d682e6921438 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2244,10 +2244,22 @@ static int allocate_hiq_sdma_mqd(struct 
device_queue_manager *dqm)
int retval;
struct kfd_dev *dev = dqm->dev;
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
-   uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
-   get_num_all_sdma_engines(dqm) *
-   dev->device_info.num_sdma_queues_per_engine +
-   dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+   uint32_t size;
+   /*
+* MES write to areas beyond MQD size. So allocate
+* 1 PAGE_SIZE memory for MQD is MES is enabled.
+*/
+   if (dev->shared_resources.enable_mes) {
+   size = PAGE_SIZE *
+   get_num_all_sdma_engines(dqm) *
+   dev->device_info.num_sdma_queues_per_engine +
+   dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+   } else {
+   size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
+   get_num_all_sdma_engines(dqm) *
+   dev->device_info.num_sdma_queues_per_engine +
+   dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+   }


This function is needed mostly as a workaround for Arcturus firmware 
limitations that doesn't have enough SRAM to store 64-bit pointers to 
all SDMA MQDs. When using MES, you can probably just use the generic 
allocate_mqd/kfd_free_mqd_cp function for SDMA MQDs. And you don't need 
an HIQ MQD at all, as far as I know, so you could skip 
allocate_hiq_sdma_mqd completely if MES is enabled.


Regards,
  Felix


  
  	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,

&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 623ccd227b7d..ea176a515898 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -66,15 +66,23 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
  {
struct kfd_mem_obj *mqd_mem_obj = NULL;
uint64_t offset;
+   uint32_t size;
  
  	mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);

if (!mqd_mem_obj)
return NULL;
+   /*
+* MES write to areas beyond MQD size. So allocate
+* 1 PAGE_SIZE memory for MQD is MES is enabled.
+*/
+   if (dev->shared_resources.enable_mes)
+   size = PAGE_SIZE;
+   else
+   size = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
  
  	offset = (q->sdma_engine_id *

dev->device_info.num_sdma_queues_per_engine +
-   q->sdma_queue_id) *
-   dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
+   q->sdma_queue_id) * size;
  
  	offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
  


Re: [PATCH] drm/amdgpu: Use the TGID for trace_amdgpu_vm_update_ptes

2023-02-06 Thread Alex Deucher
Applied.  Thanks!

Alex

On Mon, Feb 6, 2023 at 3:35 AM Christian König  wrote:
>
>
>
> Am 02.02.23 um 17:21 schrieb Friedrich Vock:
> > The pid field corresponds to the result of gettid() in userspace.
> > However, userspace cannot reliably attribute PTE events to processes
> > with just the thread id. This patch allows userspace to easily
> > attribute PTE update events to specific processes by comparing this
> > field with the result of getpid().
> >
> > For attributing events to specific threads, the thread id is also
> > contained in the common fields of each trace event.
> >
> > Signed-off-by: Friedrich Vock 
>
> Ah, yes that makes more sense. Reviewed-by: Christian König
> 
>
> Alex do you pick this up or should I take care of it?
>
> Thanks,
> Christian.
>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> > index b5f3bba851db..01e42bdd8e4e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> > @@ -974,7 +974,7 @@ int amdgpu_vm_ptes_update(struct 
> > amdgpu_vm_update_params *params,
> >   trace_amdgpu_vm_update_ptes(params, frag_start, 
> > upd_end,
> >   min(nptes, 32u), dst, 
> > incr,
> >   upd_flags,
> > - vm->task_info.pid,
> > + vm->task_info.tgid,
> >   
> > vm->immediate.fence_context);
> >   amdgpu_vm_pte_update_flags(params, 
> > to_amdgpu_bo_vm(pt),
> >  cursor.level, pe_start, 
> > dst,
> > --
> > 2.39.1
> >
>


Re: [PATCH] [SUBMITTED 20210927] [RESEND^2] drm/amdgpu: fix enum odm_combine_mode mismatch

2023-02-06 Thread Alex Deucher
On Mon, Feb 6, 2023 at 2:36 PM Arnd Bergmann  wrote:
>
> From: Arnd Bergmann 
>
> A conversion from 'bool' to 'enum odm_combine_mode' was incomplete,
> and gcc warns about this with many instances of
>
> display/dc/dml/dcn20/display_mode_vba_20.c:3899:44: warning: implicit 
> conversion from 'enum ' to 'enum
> odm_combine_mode' [-Wenum-conversion]
>  3899 | locals->ODMCombineEnablePerState[i][k] = false;
>
> Change the ones that we get a warning for, using the same numerical
> values to leave the behavior unchanged.
>
> Fixes: 5fc11598166d ("drm/amd/display: expand dml structs")
> Link: https://lore.kernel.org/all/20201026210039.3884312-3-a...@kernel.org/
> Link: https://lore.kernel.org/all/20210927100659.1431744-1-a...@kernel.org/
> Signed-off-by: Arnd Bergmann 
> ---
> I sent this in 2020 and in 2021, but never got a reply and the warning
> is still there.

Applied.  Sorry for the delay.

Alex

> ---
>  .../amd/display/dc/dml/dcn20/display_mode_vba_20.c   |  8 
>  .../amd/display/dc/dml/dcn20/display_mode_vba_20v2.c | 10 +-
>  .../amd/display/dc/dml/dcn21/display_mode_vba_21.c   | 12 ++--
>  3 files changed, 15 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c 
> b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
> index f34bc3c8da41..69c41e3e3ba2 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
> @@ -3901,14 +3901,14 @@ void 
> dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
> 
> mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = 
> mode_lib->vba.PixelClock[k] / 2
> * (1 + 
> mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
>
> -   locals->ODMCombineEnablePerState[i][k] = 
> false;
> +   locals->ODMCombineEnablePerState[i][k] = 
> dm_odm_combine_mode_disabled;
> mode_lib->vba.PlaneRequiredDISPCLK = 
> mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
> if (mode_lib->vba.ODMCapability) {
> if 
> (locals->PlaneRequiredDISPCLKWithoutODMCombine > 
> mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) {
> -   
> locals->ODMCombineEnablePerState[i][k] = true;
> +   
> locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
> 
> mode_lib->vba.PlaneRequiredDISPCLK = 
> mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
> } else if (locals->HActive[k] > 
> DCN20_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
> -   
> locals->ODMCombineEnablePerState[i][k] = true;
> +   
> locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
> 
> mode_lib->vba.PlaneRequiredDISPCLK = 
> mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
> }
> }
> @@ -3961,7 +3961,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct 
> display_mode_lib *mode_l
> locals->RequiredDISPCLK[i][j] = 0.0;
> locals->DISPCLK_DPPCLK_Support[i][j] = true;
> for (k = 0; k <= 
> mode_lib->vba.NumberOfActivePlanes - 1; k++) {
> -   
> locals->ODMCombineEnablePerState[i][k] = false;
> +   
> locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
> if (locals->SwathWidthYSingleDPP[k] 
> <= locals->MaximumSwathWidth[k]) {
> locals->NoOfDPP[i][j][k] = 1;
> 
> locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c 
> b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
> index 366138df0fe2..f475a0ae946c 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
> @@ -4012,17 +4012,17 @@ void 
> dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
> 
> mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = 
> mode_lib->vba.PixelClock[k] / 2
> * (1 + 
> mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
>
> - 

Re: [PATCH] drm/amd/amdgpu: add complete header search path

2023-02-06 Thread Alex Deucher
Applied.  Thanks!

On Fri, Feb 3, 2023 at 10:27 PM Randy Dunlap  wrote:
>
> The path for the "mod_info_packet.h" header file is
> incomplete, so add its location to the header search path
> in the amdgpu Makefile.
>
> See on ARCH=alpha (275 times in one build).
>
> In file included from ../drivers/gpu/drm/amd/amdgpu/amdgpu.h:90,
>  from ../drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:43:
> ../drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.h:62:10: fatal 
> error: mod_info_packet.h: No such file or directory
>62 | #include "mod_info_packet.h"
>   |  ^~~
> compilation terminated.
>
> Fixes: 5b49da02ddbe ("drm/amd/display: Enable Freesync over PCon")
> Signed-off-by: Randy Dunlap 
> Cc: Signed-off-by: Sung Joon Kim 
> Cc: Alex Deucher 
> Cc: Christian König 
> Cc: "Pan, Xinhui" 
> Cc: amd-gfx@lists.freedesktop.org
> Cc: dri-de...@lists.freedesktop.org
> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile |1 +
>  1 file changed, 1 insertion(+)
>
> diff -- a/drivers/gpu/drm/amd/amdgpu/Makefile 
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -34,6 +34,7 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/
> -I$(FULL_AMD_PATH)/acp/include \
> -I$(FULL_AMD_DISPLAY_PATH) \
> -I$(FULL_AMD_DISPLAY_PATH)/include \
> +   -I$(FULL_AMD_DISPLAY_PATH)/modules/inc \
> -I$(FULL_AMD_DISPLAY_PATH)/dc \
> -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
> -I$(FULL_AMD_PATH)/amdkfd


RE: [PATCH 1/2] drm/amdgpu: Fix incorrect filenames in sysfs comments

2023-02-06 Thread Kasiviswanathan, Harish
[AMD Official Use Only - General]

This series Reviewed-by: Harish Kasiviswanathan 

-Original Message-
From: amd-gfx  On Behalf Of 
kent.russ...@amd.com
Sent: Monday, February 6, 2023 12:26 PM
To: amd-gfx@lists.freedesktop.org
Cc: Russell, Kent 
Subject: [PATCH 1/2] drm/amdgpu: Fix incorrect filenames in sysfs comments

This looks like a standard copy/paste mistake. Replace the incorrect
serial_number references with product_name and product_model

Signed-off-by: kent.russ...@amd.com 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a10b627c8357..5a97021bbb23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -162,7 +162,7 @@ static void amdgpu_device_get_pcie_info(struct 
amdgpu_device *adev);
  *
  * The amdgpu driver provides a sysfs API for reporting the product name
  * for the device
- * The file serial_number is used for this and returns the product name
+ * The file product_name is used for this and returns the product name
  * as returned from the FRU.
  * NOTE: This is only available for certain server cards
  */
@@ -184,7 +184,7 @@ static DEVICE_ATTR(product_name, S_IRUGO,
  *
  * The amdgpu driver provides a sysfs API for reporting the part number
  * for the device
- * The file serial_number is used for this and returns the part number
+ * The file product_number is used for this and returns the part number
  * as returned from the FRU.
  * NOTE: This is only available for certain server cards
  */
-- 
2.34.1


[PATCH] drm: Rename headers to match DP2.1 spec

2023-02-06 Thread jdhillon
This patch changes the headers defined in drm_dp.h to match
the DP 2.1 spec.

Signed-off-by: Jasdeep Dhillon 
---
 drivers/gpu/drm/tegra/dp.c   |  2 +-
 include/drm/display/drm_dp.h | 13 +++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index 08fbd8f151a1..f33e468ece0a 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -499,7 +499,7 @@ static int drm_dp_link_apply_training(struct drm_dp_link 
*link)
for (i = 0; i < lanes; i++)
values[i / 2] |= DP_LANE_POST_CURSOR(i, pc[i]);
 
-   err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_1_SET2, values,
+   err = drm_dp_dpcd_write(aux, DP_LINK_SQUARE_PATTERN, values,
DIV_ROUND_UP(lanes, 2));
if (err < 0) {
DRM_ERROR("failed to set post-cursor: %d\n", err);
diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h
index ed10e6b6f99d..2093c1f8d8e0 100644
--- a/include/drm/display/drm_dp.h
+++ b/include/drm/display/drm_dp.h
@@ -641,12 +641,11 @@
 # define DP_LINK_QUAL_PATTERN_CUSTOM0x40
 # define DP_LINK_QUAL_PATTERN_SQUARE0x48
 
-#define DP_TRAINING_LANE0_1_SET2   0x10f
-#define DP_TRAINING_LANE2_3_SET2   0x110
-# define DP_LANE02_POST_CURSOR2_SET_MASK(3 << 0)
-# define DP_LANE02_MAX_POST_CURSOR2_REACHED (1 << 2)
-# define DP_LANE13_POST_CURSOR2_SET_MASK(3 << 4)
-# define DP_LANE13_MAX_POST_CURSOR2_REACHED (1 << 6)
+#define DP_LINK_SQUARE_PATTERN 0x10f
+#define DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX0x110
+# define DP_UHBR10_20_CAPABILITY   (3 << 0)
+# define DP_UHBR13_5_CAPABILITY(1 << 2)
+# define DP_CABLE_TYPE (7 << 3)
 
 #define DP_MSTM_CTRL   0x111   /* 1.2 */
 # define DP_MST_EN (1 << 0)
@@ -1127,6 +1126,8 @@
 # define DP_128B132B_TRAINING_AUX_RD_INTERVAL_32_MS 0x05
 # define DP_128B132B_TRAINING_AUX_RD_INTERVAL_64_MS 0x06
 
+#define DP_CABLE_ATTRIBUTES_UPDATED_BY_DPRX
0x2217 /* 2.0 */
+
 #define DP_TEST_264BIT_CUSTOM_PATTERN_7_0  0x2230
 #define DP_TEST_264BIT_CUSTOM_PATTERN_263_256  0x2250
 
-- 
2.34.1



[PATCH] [SUBMITTED 20210927] [RESEND^2] drm/amdgpu: fix enum odm_combine_mode mismatch

2023-02-06 Thread Arnd Bergmann
From: Arnd Bergmann 

A conversion from 'bool' to 'enum odm_combine_mode' was incomplete,
and gcc warns about this with many instances of

display/dc/dml/dcn20/display_mode_vba_20.c:3899:44: warning: implicit 
conversion from 'enum ' to 'enum
odm_combine_mode' [-Wenum-conversion]
 3899 | locals->ODMCombineEnablePerState[i][k] = false;

Change the ones that we get a warning for, using the same numerical
values to leave the behavior unchanged.

Fixes: 5fc11598166d ("drm/amd/display: expand dml structs")
Link: https://lore.kernel.org/all/20201026210039.3884312-3-a...@kernel.org/
Link: https://lore.kernel.org/all/20210927100659.1431744-1-a...@kernel.org/
Signed-off-by: Arnd Bergmann 
---
I sent this in 2020 and in 2021, but never got a reply and the warning
is still there.
---
 .../amd/display/dc/dml/dcn20/display_mode_vba_20.c   |  8 
 .../amd/display/dc/dml/dcn20/display_mode_vba_20v2.c | 10 +-
 .../amd/display/dc/dml/dcn21/display_mode_vba_21.c   | 12 ++--
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
index f34bc3c8da41..69c41e3e3ba2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
@@ -3901,14 +3901,14 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l

mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] 
/ 2
* (1 + 
mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
 
-   locals->ODMCombineEnablePerState[i][k] = false;
+   locals->ODMCombineEnablePerState[i][k] = 
dm_odm_combine_mode_disabled;
mode_lib->vba.PlaneRequiredDISPCLK = 
mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
if (mode_lib->vba.ODMCapability) {
if 
(locals->PlaneRequiredDISPCLKWithoutODMCombine > 
mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) {
-   
locals->ODMCombineEnablePerState[i][k] = true;
+   
locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;

mode_lib->vba.PlaneRequiredDISPCLK = 
mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
} else if (locals->HActive[k] > 
DCN20_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
-   
locals->ODMCombineEnablePerState[i][k] = true;
+   
locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;

mode_lib->vba.PlaneRequiredDISPCLK = 
mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
}
}
@@ -3961,7 +3961,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct 
display_mode_lib *mode_l
locals->RequiredDISPCLK[i][j] = 0.0;
locals->DISPCLK_DPPCLK_Support[i][j] = true;
for (k = 0; k <= 
mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-   locals->ODMCombineEnablePerState[i][k] 
= false;
+   locals->ODMCombineEnablePerState[i][k] 
= dm_odm_combine_mode_disabled;
if (locals->SwathWidthYSingleDPP[k] <= 
locals->MaximumSwathWidth[k]) {
locals->NoOfDPP[i][j][k] = 1;
locals->RequiredDPPCLK[i][j][k] 
= locals->MinDPPCLKUsingSingleDPP[k]
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
index 366138df0fe2..f475a0ae946c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
@@ -4012,17 +4012,17 @@ void 
dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode

mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] 
/ 2
* (1 + 
mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
 
-   locals->ODMCombineEnablePerState[i][k] = false;
+   locals->ODMCombineEnablePerState[i][k] = 
dm_odm_combine_mode_disabled;
mode_lib->vba.PlaneRequiredDISPCLK = 
mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCo

Re: [PATCH] drm/amdgpu: Fix potential race processing vm->freed

2023-02-06 Thread Christian König

Am 06.02.23 um 19:21 schrieb Rob Clark:

On Mon, Feb 6, 2023 at 8:05 AM Christian König  wrote:

Am 06.02.23 um 16:52 schrieb Rob Clark:

On Mon, Feb 6, 2023 at 2:15 AM Christian König  wrote:

Am 03.02.23 um 19:10 schrieb Rob Clark:

From: Rob Clark 

If userspace calls the AMDGPU_CS ioctl from multiple threads, because
the vm is global to the drm_file, you can end up with multiple threads
racing in amdgpu_vm_clear_freed().  So the freed list should be
protected with the status_lock, similar to other vm lists.

Well this is nonsense. To process the freed list the VM root PD lock
must be held anyway.

If we have a call path where this isn't true then we have a major bug at
a different place here.

I'm not super familiar w/ the amdgpu cs parser stuff, but the only
thing that I'm seeing that protects things is the bo_list_mutex and it
isn't clear to me that this is 1:1 with the vm (it looks like it is
not).

Do you have a backtrace?

Take a look at the reservation object of vm->root.bo. This should always
be locked first before doing *anything* in a CS.

If that isn't the case we have a much worse problem.

In this case, maybe an dma_resv_assert_held() would be a good idea?


We should already have that. Which makes me really wonder what the heck 
is going on here.


Christian.



BR,
-R


(I cc'd you on the bug report, jfyi)

I unfortunately only get a permission denied when I try to access that one.

Regards,
Christian.


BR,
-R


Regards,
Christian.


Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)")
Signed-off-by: Rob Clark 
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 33 ++
1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b9441ab457ea..aeed7bc1512f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1240,10 +1240,19 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
uint64_t init_pte_value = 0;
struct dma_fence *f = NULL;
+ struct list_head freed;
int r;

- while (!list_empty(&vm->freed)) {
- mapping = list_first_entry(&vm->freed,
+ /*
+  * Move the contents of the VM's freed list to a local list
+  * that we can iterate without racing against other threads:
+  */
+ spin_lock(&vm->status_lock);
+ list_replace_init(&vm->freed, &freed);
+ spin_unlock(&vm->status_lock);
+
+ while (!list_empty(&freed)) {
+ mapping = list_first_entry(&freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);

@@ -1258,6 +1267,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
+
+ /*
+  * Move any unprocessed mappings back to the freed
+  * list:
+  */
+ spin_lock(&vm->status_lock);
+ list_splice_tail(&freed, &vm->freed);
+ spin_unlock(&vm->status_lock);
+
return r;
}
}
@@ -1583,11 +1601,14 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
mapping->bo_va = NULL;
trace_amdgpu_vm_bo_unmap(bo_va, mapping);

- if (valid)
+ if (valid) {
+ spin_lock(&vm->status_lock);
list_add(&mapping->list, &vm->freed);
- else
+ spin_unlock(&vm->status_lock);
+ } else {
amdgpu_vm_free_mapping(adev, vm, mapping,
   bo_va->last_pt_update);
+ }

return 0;
}
@@ -1671,7 +1692,9 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device 
*adev,
tmp->last = eaddr;

tmp->bo_va = NULL;
+ spin_lock(&vm->status_lock);
list_add(&tmp->list, &vm->freed);
+ spin_unlock(&vm->status_lock);
trace_amdgpu_vm_bo_unmap(NULL, tmp);
}

@@ -1788,7 +1811,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
amdgpu_vm_it_remove(mapping, &vm->va);
mapping->bo_va = NULL;
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+ spin_lock(&vm->status_lock);
list_add(&mapping->list, &vm->freed);
+ spin_unlock(&vm->status_lock);
}
list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
list_del(&mapping->list);




Re: [PATCH] drm/amdgpu: Fix potential race processing vm->freed

2023-02-06 Thread Rob Clark
On Mon, Feb 6, 2023 at 8:05 AM Christian König  wrote:
>
> Am 06.02.23 um 16:52 schrieb Rob Clark:
> > On Mon, Feb 6, 2023 at 2:15 AM Christian König  
> > wrote:
> >> Am 03.02.23 um 19:10 schrieb Rob Clark:
> >>> From: Rob Clark 
> >>>
> >>> If userspace calls the AMDGPU_CS ioctl from multiple threads, because
> >>> the vm is global to the drm_file, you can end up with multiple threads
> >>> racing in amdgpu_vm_clear_freed().  So the freed list should be
> >>> protected with the status_lock, similar to other vm lists.
> >> Well this is nonsense. To process the freed list the VM root PD lock
> >> must be held anyway.
> >>
> >> If we have a call path where this isn't true then we have a major bug at
> >> a different place here.
> > I'm not super familiar w/ the amdgpu cs parser stuff, but the only
> > thing that I'm seeing that protects things is the bo_list_mutex and it
> > isn't clear to me that this is 1:1 with the vm (it looks like it is
> > not).
>
> Do you have a backtrace?
>
> Take a look at the reservation object of vm->root.bo. This should always
> be locked first before doing *anything* in a CS.
>
> If that isn't the case we have a much worse problem.

In this case, maybe an dma_resv_assert_held() would be a good idea?

BR,
-R

> > (I cc'd you on the bug report, jfyi)
>
> I unfortunately only get a permission denied when I try to access that one.
>
> Regards,
> Christian.
>
> >
> > BR,
> > -R
> >
> >> Regards,
> >> Christian.
> >>
> >>> Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)")
> >>> Signed-off-by: Rob Clark 
> >>> ---
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 33 ++
> >>>1 file changed, 29 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> index b9441ab457ea..aeed7bc1512f 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> @@ -1240,10 +1240,19 @@ int amdgpu_vm_clear_freed(struct amdgpu_device 
> >>> *adev,
> >>>struct amdgpu_bo_va_mapping *mapping;
> >>>uint64_t init_pte_value = 0;
> >>>struct dma_fence *f = NULL;
> >>> + struct list_head freed;
> >>>int r;
> >>>
> >>> - while (!list_empty(&vm->freed)) {
> >>> - mapping = list_first_entry(&vm->freed,
> >>> + /*
> >>> +  * Move the contents of the VM's freed list to a local list
> >>> +  * that we can iterate without racing against other threads:
> >>> +  */
> >>> + spin_lock(&vm->status_lock);
> >>> + list_replace_init(&vm->freed, &freed);
> >>> + spin_unlock(&vm->status_lock);
> >>> +
> >>> + while (!list_empty(&freed)) {
> >>> + mapping = list_first_entry(&freed,
> >>>struct amdgpu_bo_va_mapping, list);
> >>>list_del(&mapping->list);
> >>>
> >>> @@ -1258,6 +1267,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device 
> >>> *adev,
> >>>amdgpu_vm_free_mapping(adev, vm, mapping, f);
> >>>if (r) {
> >>>dma_fence_put(f);
> >>> +
> >>> + /*
> >>> +  * Move any unprocessed mappings back to the freed
> >>> +  * list:
> >>> +  */
> >>> + spin_lock(&vm->status_lock);
> >>> + list_splice_tail(&freed, &vm->freed);
> >>> + spin_unlock(&vm->status_lock);
> >>> +
> >>>return r;
> >>>}
> >>>}
> >>> @@ -1583,11 +1601,14 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
> >>>mapping->bo_va = NULL;
> >>>trace_amdgpu_vm_bo_unmap(bo_va, mapping);
> >>>
> >>> - if (valid)
> >>> + if (valid) {
> >>> + spin_lock(&vm->status_lock);
> >>>list_add(&mapping->list, &vm->freed);
> >>> - else
> >>> + spin_unlock(&vm->status_lock);
> >>> + } else {
> >>>amdgpu_vm_free_mapping(adev, vm, mapping,
> >>>   bo_va->last_pt_update);
> >>> + }
> >>>
> >>>return 0;
> >>>}
> >>> @@ -1671,7 +1692,9 @@ int amdgpu_vm_bo_clear_mappings(struct 
> >>> amdgpu_device *adev,
> >>>tmp->last = eaddr;
> >>>
> >>>tmp->bo_va = NULL;
> >>> + spin_lock(&vm->status_lock);
> >>>list_add(&tmp->list, &vm->freed);
> >>> + spin_unlock(&vm->status_lock);
> >>>trace_amdgpu_vm_bo_unmap(NULL, tmp);
> >>>}
> >>>
> >>> @@ -1788,7 +1811,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
> >>>amdgpu_vm_it_remove(mapping, &vm->va);
> >>>mapping->bo_va = NULL;
> >>>trace_amdgpu_vm_bo_unmap(bo_va, mapping);
> >>> + spin_lock(&vm->status_lock);
> >>>list_add(&mapping->list, &vm->freed);
> >>> + spin_unlock(&vm->s

[linux-next:master] BUILD REGRESSION 129af770823407ee115a56c69a04b440fd2fbe61

2023-02-06 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 129af770823407ee115a56c69a04b440fd2fbe61  Add linux-next specific 
files for 20230206

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202301230743.xnut0zvc-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202301300743.bp7dpazv-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202301301801.y5o08tqx-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202301302110.metnwkbd-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202302011836.ka3bxqdy-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202302061911.c7xvhx9v-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202302062223.7f7gv80m-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202302062224.byzetxh1-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

Documentation/riscv/uabi.rst:24: WARNING: Enumerated list ends without a blank 
line; unexpected unindent.
ERROR: modpost: "devm_platform_ioremap_resource" [drivers/dma/fsl-edma.ko] 
undefined!
ERROR: modpost: "devm_platform_ioremap_resource" [drivers/dma/idma64.ko] 
undefined!
FAILED: load BTF from vmlinux: No data available
arch/arm64/kvm/arm.c:2207: warning: expecting prototype for Initialize Hyp(). 
Prototype was for kvm_arm_init() instead
drivers/clk/qcom/gcc-sa8775p.c:313:32: warning: unused variable 
'gcc_parent_map_10' [-Wunused-const-variable]
drivers/clk/qcom/gcc-sa8775p.c:318:37: warning: unused variable 
'gcc_parent_data_10' [-Wunused-const-variable]
drivers/clk/qcom/gcc-sa8775p.c:333:32: warning: unused variable 
'gcc_parent_map_12' [-Wunused-const-variable]
drivers/clk/qcom/gcc-sa8775p.c:338:37: warning: unused variable 
'gcc_parent_data_12' [-Wunused-const-variable]
drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.h:62:10: fatal error: 
mod_info_packet.h: No such file or directory
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_hubbub.c:1011:6: warning: 
no previous prototype for 'hubbub31_init' [-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_hubbub.c:948:6: warning: 
no previous prototype for 'hubbub32_init' [-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_hubp.c:158:6: warning: no 
previous prototype for 'hubp32_init' [-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_resource_helpers.c:62:18: 
warning: variable 'cursor_bpp' set but not used [-Wunused-but-set-variable]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/accessories/link_dp_trace.c:148:6:
 warning: no previous prototype for 'link_dp_trace_set_edp_power_timestamp' 
[-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/accessories/link_dp_trace.c:158:10:
 warning: no previous prototype for 'link_dp_trace_get_edp_poweron_timestamp' 
[-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/accessories/link_dp_trace.c:163:10:
 warning: no previous prototype for 'link_dp_trace_get_edp_poweroff_timestamp' 
[-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_capability.c:1295:32:
 warning: variable 'result_write_min_hblank' set but not used 
[-Wunused-but-set-variable]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_capability.c:279:42:
 warning: variable 'ds_port' set but not used [-Wunused-but-set-variable]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_training.c:1585:38:
 warning: variable 'result' set but not used [-Wunused-but-set-variable]
libbpf: failed to find '.BTF' ELF section in vmlinux

Unverified Error/Warning (likely false positive, please contact us if 
interested):

drivers/thermal/qcom/tsens-v0_1.c:106:40: sparse: sparse: symbol 
'tsens_9607_nvmem' was not declared. Should it be static?
drivers/thermal/qcom/tsens-v0_1.c:26:40: sparse: sparse: symbol 
'tsens_8916_nvmem' was not declared. Should it be static?
drivers/thermal/qcom/tsens-v0_1.c:42:40: sparse: sparse: symbol 
'tsens_8939_nvmem' was not declared. Should it be static?
drivers/thermal/qcom/tsens-v0_1.c:62:40: sparse: sparse: symbol 
'tsens_8974_nvmem' was not declared. Should it be static?
drivers/thermal/qcom/tsens-v0_1.c:84:40: sparse: sparse: symbol 
'tsens_8974_backup_nvmem' was not declared. Should it be static?
drivers/thermal/qcom/tsens-v1.c:24:40: sparse: sparse: symbol 
'tsens_qcs404_nvmem' was not declared. Should it be static?
drivers/thermal/qcom/tsens-v1.c:45:40: sparse: sparse: symbol 
'tsens_8976_nvmem' was not declared. Should it be static?

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- alpha-randconfig-r022-20230205
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-amdgpu_dm-amdgpu_dm.h:fatal-error:mod_info_packet.h:No-such-file-or-directory
|-- alpha-randc

Re: [PATCH] drm/amd/display: fix cursor offset on rotation 180

2023-02-06 Thread Hamza Mahfooz

On 1/31/23 11:05, Melissa Wen wrote:

Cursor gets clipped off in the middle of the screen with hw rotation
180. Fix a miscalculation of cursor offset when it's placed near the
edges in the pipe split case.

Cursor bugs with hw rotation were reported on AMD issue tracker:
https://gitlab.freedesktop.org/drm/amd/-/issues/2247

The issues on rotation 270 was fixed by:
https://lore.kernel.org/amd-gfx/20221118125935.4013669-22-brian.ch...@amd.com/
that partially addressed the rotation 180 too. So, this patch is the
final bits for rotation 180.

Reported-by: Xaver Hugl 
Fixes: 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal 
mirror")
Signed-off-by: Melissa Wen 


Applied, thanks!


---
  drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index bb155734ac93..480c0b3b51fc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -3624,7 +3624,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
(int)hubp->curs_attr.width || 
pos_cpy.x
<= (int)hubp->curs_attr.width +

pipe_ctx->plane_state->src_rect.x) {
-   pos_cpy.x = temp_x + 
viewport_width;
+   pos_cpy.x = 2 * viewport_width 
- temp_x;
}
}
} else {


--
Hamza



[PATCH 2/2] drm/amdgpu: Add unique_id support for GC 11.0.1/2

2023-02-06 Thread kent.russ...@amd.com
These can support unique_id, so create the sysfs file for them

Signed-off-by: kent.russ...@amd.com 
---
 drivers/gpu/drm/amd/pm/amdgpu_pm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index bd060697d982..bf6d63673b5a 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1991,6 +1991,8 @@ static int default_attr_update(struct amdgpu_device 
*adev, struct amdgpu_device_
case IP_VERSION(9, 4, 2):
case IP_VERSION(10, 3, 0):
case IP_VERSION(11, 0, 0):
+   case IP_VERSION(11, 0, 1):
+   case IP_VERSION(11, 0, 2):
*states = ATTR_STATE_SUPPORTED;
break;
default:
-- 
2.34.1



[PATCH 1/2] drm/amdgpu: Fix incorrect filenames in sysfs comments

2023-02-06 Thread kent.russ...@amd.com
This looks like a standard copy/paste mistake. Replace the incorrect
serial_number references with product_name and product_model

Signed-off-by: kent.russ...@amd.com 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a10b627c8357..5a97021bbb23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -162,7 +162,7 @@ static void amdgpu_device_get_pcie_info(struct 
amdgpu_device *adev);
  *
  * The amdgpu driver provides a sysfs API for reporting the product name
  * for the device
- * The file serial_number is used for this and returns the product name
+ * The file product_name is used for this and returns the product name
  * as returned from the FRU.
  * NOTE: This is only available for certain server cards
  */
@@ -184,7 +184,7 @@ static DEVICE_ATTR(product_name, S_IRUGO,
  *
  * The amdgpu driver provides a sysfs API for reporting the part number
  * for the device
- * The file serial_number is used for this and returns the part number
+ * The file product_number is used for this and returns the part number
  * as returned from the FRU.
  * NOTE: This is only available for certain server cards
  */
-- 
2.34.1



Re: [PATCH] drm/amd/display: fix cursor offset on rotation 180

2023-02-06 Thread Harry Wentland



On 1/31/23 11:05, Melissa Wen wrote:
> Cursor gets clipped off in the middle of the screen with hw rotation
> 180. Fix a miscalculation of cursor offset when it's placed near the
> edges in the pipe split case.
> 
> Cursor bugs with hw rotation were reported on AMD issue tracker:
> https://gitlab.freedesktop.org/drm/amd/-/issues/2247
> 
> The issues on rotation 270 was fixed by:
> https://lore.kernel.org/amd-gfx/20221118125935.4013669-22-brian.ch...@amd.com/
> that partially addressed the rotation 180 too. So, this patch is the
> final bits for rotation 180.
> 
> Reported-by: Xaver Hugl 
> Fixes: 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal 
> mirror")
> Signed-off-by: Melissa Wen 

Reviewed-by: Harry Wentland 

Harry

> ---
>  drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c 
> b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
> index bb155734ac93..480c0b3b51fc 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
> @@ -3624,7 +3624,7 @@ void dcn10_set_cursor_position(struct pipe_ctx 
> *pipe_ctx)
>   (int)hubp->curs_attr.width || 
> pos_cpy.x
>   <= (int)hubp->curs_attr.width +
>   
> pipe_ctx->plane_state->src_rect.x) {
> - pos_cpy.x = temp_x + 
> viewport_width;
> + pos_cpy.x = 2 * viewport_width 
> - temp_x;
>   }
>   }
>   } else {



Re: [PATCH 14/14] drm/amdgpu: introduce doorbell bo in kernel

2023-02-06 Thread Shashank Sharma



On 06/02/2023 17:57, Christian König wrote:

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

This patch does the following:
- Removes doorbell ptr from adev.
- Moves doorbell.num_doorbells into mman.num_doorbells.
- Introduces a kernel bo for doorbell management in form of 
mman.doorbell_kernel_bo.

   This bo holds the doorbell space now.
- Introduces mman.doorbell_bo_size: to save total doorbell memory size.
- Also introduces mman.doorbell_va to save CPU address of doorbell BO
   mapping. This ptr will be used now for doorbell read/write from 
doorbell BAR.


Please don't call any CPU address VA, this is a term we usually use 
with GPUVM.


Rather use doorbell_cpu_addr for this.

Christian.


Noted,

- Shashank





Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  5 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 16 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 24 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  6 +
  6 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index e1c1a360614e..dd3875ae1ad3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -834,7 +834,6 @@ struct amdgpu_device {
  spinlock_t audio_endpt_idx_lock;
  amdgpu_block_rreg_t    audio_endpt_rreg;
  amdgpu_block_wreg_t    audio_endpt_wreg;
-    struct amdgpu_doorbell    doorbell;
    /* clock/pll info */
  struct amdgpu_clock    clock;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index 28076da2258f..aea943e337df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -109,11 +109,10 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,

  *aperture_base = adev->gmc.doorbell_aper_base;
  *aperture_size = 0;
  *start_offset = 0;
-    } else if (adev->gmc.doorbell_aper_size > 
adev->doorbell.num_doorbells *

-    sizeof(u32)) {
+    } else if (adev->gmc.doorbell_aper_size > 
adev->mman.doorbell_kernel_bo_size) {

  *aperture_base = adev->gmc.doorbell_aper_base;
  *aperture_size = adev->gmc.doorbell_aper_size;
-    *start_offset = adev->doorbell.num_doorbells * sizeof(u32);
+    *start_offset = adev->mman.doorbell_kernel_bo_size;
  } else {
  *aperture_base = 0;
  *aperture_size = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 7c21ffe63ebc..9c3ce1ae66ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -593,8 +593,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device 
*adev, u32 index)

  if (amdgpu_device_skip_hw_access(adev))
  return 0;
  -    if (index < adev->doorbell.num_doorbells) {
-    return readl(adev->mman.doorbell_aper_base_kaddr + index);
+    if (index < adev->mman.num_doorbells) {
+    return readl(adev->mman.doorbell_va + index);
  } else {
  DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", 
index);

  return 0;
@@ -616,8 +616,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device 
*adev, u32 index, u32 v)

  if (amdgpu_device_skip_hw_access(adev))
  return;
  -    if (index < adev->doorbell.num_doorbells) {
-    writel(v, adev->mman.doorbell_aper_base_kaddr + index);
+    if (index < adev->mman.num_doorbells) {
+    writel(v, adev->mman.doorbell_va + index);
  } else {
  DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", 
index);

  }
@@ -637,8 +637,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device 
*adev, u32 index)

  if (amdgpu_device_skip_hw_access(adev))
  return 0;
  -    if (index < adev->doorbell.num_doorbells) {
-    return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index));

+    if (index < adev->mman.num_doorbells) {
+    return atomic64_read((atomic64_t *)(adev->mman.doorbell_va + 
index));

  } else {
  DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", 
index);

  return 0;
@@ -660,8 +660,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device 
*adev, u32 index, u64 v)

  if (amdgpu_device_skip_hw_access(adev))
  return;
  -    if (index < adev->doorbell.num_doorbells) {
-    atomic64_set((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index), v);

+    if (index < adev->mman.num_doorbells) {
+    atomic64_set((atomic64_t *)(adev->mman.doorbell_va + index), 
v);

  } else {
  DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", 
index);

  }
diff --git a/drivers/gpu

Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum

2023-02-06 Thread Harry Wentland



On 2/6/23 04:47, Ville Syrjälä wrote:
> On Sat, Feb 04, 2023 at 06:09:45AM +, Joshua Ashton wrote:
>>
>>
>> On 2/3/23 19:34, Ville Syrjälä wrote:
>>> On Fri, Feb 03, 2023 at 09:25:38PM +0200, Ville Syrjälä wrote:
 On Fri, Feb 03, 2023 at 08:56:55PM +0200, Ville Syrjälä wrote:
> On Fri, Feb 03, 2023 at 01:28:20PM -0500, Harry Wentland wrote:
>>
>>
>> On 2/3/23 11:00, Ville Syrjälä wrote:
>>> On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote:


 On 2/3/23 10:19, Ville Syrjälä wrote:
> On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote:
>>
>>
>> On 2/3/23 07:59, Sebastian Wick wrote:
>>> On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä
>>>  wrote:

 On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote:
> Userspace has no way of controlling or knowing the pixel encoding
> currently, so there is no way for it to ever get the right values 
> here.

 That applies to a lot of the other values as well (they are
 explicitly RGB or YCC). The idea was that this property sets the
 infoframe/MSA/SDP value exactly, and other properties should be
 added to for use userspace to control the pixel encoding/colorspace
 conversion(if desired, or userspace just makes sure to
 directly feed in correct kind of data).
>>>
>>> I'm all for getting userspace control over pixel encoding but even
>>> then the kernel always knows which pixel encoding is selected and
>>> which InfoFrame has to be sent. Is there a reason why userspace 
>>> would
>>> want to control the variant explicitly to the wrong value?
>>>
>>
>> I've asked this before but haven't seen an answer: Is there an 
>> existing
>> upstream userspace project that makes use of this property (other 
>> than
>> what Joshua is working on in gamescope right now)? That would help us
>> understand the intent better.
>
> The intent was to control the infoframe colorimetry bits,
> nothing more. No idea what real userspace there was, if any.
>>
>> Controlling the infoframe alone isn't useful at all unless you can 
>> guarantee the wire encoding, which we cannot do.
>>
>
>>
>> I don't think giving userspace explicit control over the exact 
>> infoframe
>> values is the right thing to do.
>>
>> +1
>>
>
> Only userspace knows what kind of data it's stuffing into
> the pixels (and/or how it configures the csc units/etc.) to
> generate them.
>

 Yes, but userspace doesn't control or know whether we drive
 RGB or YCbCr on the wire. In fact, in some cases our driver
 needs to fallback to YCbCr420 for bandwidth reasons. There
 is currently no way for userspace to know that and I don't
 think it makes sense.
>>>
>>> People want that control as well for whatever reason. We've
>>> been asked to allow YCbCr 4:4:4 output many times.
>>>
>>> The automagic 4:2:0 fallback I think is rather fundementally
>>> incompatible with fancy color management. How would we even
>>> know whether to use eg. BT.2020 vs. BT.709 matrix? In i915
>>> that stuff is just always BT.709 limited range, no questions
>>> asked.
>>
>> That's what the Colorspace property *should* be determining here.
>> That's what we have it set up to do in SteamOS/my tree right now.
>>
>>>
>>
>> We use what we're telling the display, i.e., the value in the
>> colorspace property. That way we know whether to use a BT.2020
>> or BT.709 matrix.
>
> And given how these things have gone in the past I think
> that is likey to bite someone at in the future. Also not
> what this property was meant to do nor does on any other
> driver AFAIK.
>
>> I don't see how it's fundamentally incompatible with fancy
>> color management stuff.
>>
>> If we start forbidding drivers from falling back to YCbCr
>> (whether 4:4:4 or 4:2:0) we will break existing behavior on
>> amdgpu and will see bug reports.
>
> The compositors could deal with that if/when they start doing
> the full color management stuff. The current stuff only really
> works when the kernel is allowed to do whatever it wants.
>
>>
>>> So I think if userspace wants real color management it's
>>> going to have to set up the whole pipeline. And for that
>>> we need at least one new property to control the RGB->YCbCr
>>> conversion (or to explicitly avoid it).
>>
>> I mentioned this in my commit description, we absolutely should offer 
>> fine control here eventually.
>>
>> I don't think we need to solve that problem here though.
>

[PATCH] drm/amd/pm/smu7: move variables to where they are used

2023-02-06 Thread Alex Deucher
Move variable declarations to where they are used.  Fixes
a segfault on smu7 V0 structures where some tables don't
exist.

Cc: Evan Quan 
Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2388
Fixes: 711d3c39503b ("drm/amd/pm: fulfill powerplay peak profiling mode 
shader/memory clock settings")
Signed-off-by: Alex Deucher 
---
 .../gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c| 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index 89fc32318d80..e10cc5e7928e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -1504,12 +1504,6 @@ static void smu7_populate_umdpstate_clocks(struct 
pp_hwmgr *hwmgr)
 {
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
struct smu7_dpm_table *golden_dpm_table = &data->golden_dpm_table;
-   struct phm_clock_voltage_dependency_table *vddc_dependency_on_sclk =
-   hwmgr->dyn_state.vddc_dependency_on_sclk;
-   struct phm_ppt_v1_information *table_info =
-   (struct phm_ppt_v1_information *)(hwmgr->pptable);
-   struct phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_on_sclk =
-   table_info->vdd_dep_on_sclk;
int32_t tmp_sclk, count, percentage;
 
if (golden_dpm_table->mclk_table.count == 1) {
@@ -1524,6 +1518,9 @@ static void smu7_populate_umdpstate_clocks(struct 
pp_hwmgr *hwmgr)
tmp_sclk = hwmgr->pstate_mclk * percentage / 100;
 
if (hwmgr->pp_table_version == PP_TABLE_V0) {
+   struct phm_clock_voltage_dependency_table 
*vddc_dependency_on_sclk =
+   hwmgr->dyn_state.vddc_dependency_on_sclk;
+
for (count = vddc_dependency_on_sclk->count - 1; count >= 0; 
count--) {
if (tmp_sclk >= 
vddc_dependency_on_sclk->entries[count].clk) {
hwmgr->pstate_sclk = 
vddc_dependency_on_sclk->entries[count].clk;
@@ -1536,6 +1533,11 @@ static void smu7_populate_umdpstate_clocks(struct 
pp_hwmgr *hwmgr)
hwmgr->pstate_sclk_peak =

vddc_dependency_on_sclk->entries[vddc_dependency_on_sclk->count - 1].clk;
} else if (hwmgr->pp_table_version == PP_TABLE_V1) {
+   struct phm_ppt_v1_information *table_info =
+   (struct phm_ppt_v1_information *)(hwmgr->pptable);
+   struct phm_ppt_v1_clock_voltage_dependency_table 
*vdd_dep_on_sclk =
+   table_info->vdd_dep_on_sclk;
+
for (count = vdd_dep_on_sclk->count - 1; count >= 0; count--) {
if (tmp_sclk >= vdd_dep_on_sclk->entries[count].clk) {
hwmgr->pstate_sclk = 
vdd_dep_on_sclk->entries[count].clk;
-- 
2.39.1



Re: [PATCH 12/13] drm/amdgpu: add domain info in bo_create_kernel_at

2023-02-06 Thread Christian König

Am 06.02.23 um 18:01 schrieb Alex Deucher:

On Mon, Feb 6, 2023 at 11:51 AM Christian König
 wrote:

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Shashank Sharma 

This patch adds a domain input variable for amdgpu_bo_create_kernel_at,
so that it could be used for both VRAM and DOORBELL domains objects. It
also adds supporting code for existing callers.

We should probably drop this one as well.

We just removed the domain from the function because we only have BIOS
reserved regions in VRAM, never anywhere else.

Allocating a doorbell for the kernel is not really an use case for
amdgpu_bo_create_kernel_at().

We just need some way to guarantee that the kernel always gets the
first page.  It's required for SR-IOV compatibility.


That should be guaranteed when we use ttm_range_manager() since that one 
gives you pages in the order you allocate them.


If the first page is already taken then bo_create_kernel_at() won't help 
either, you just get a error returned.


Just allocating and returning the error yourself does the same here.

Christian.



Alex


Christian.


Signed-off-by: Shashank Sharma 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 ++--
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 7 +++
   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 1 +
   4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ef1f3106bc69..dec391fa42dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -367,7 +367,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
* 0 on success, negative error code otherwise.
*/
   int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
-uint64_t offset, uint64_t size,
+uint64_t offset, uint64_t size, uint32_t domain,
  struct amdgpu_bo **bo_ptr, void **cpu_addr)
   {
   struct ttm_operation_ctx ctx = { false, false };
@@ -378,7 +378,7 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
   size = ALIGN(size, PAGE_SIZE);

   r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
-   AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
+   domain, bo_ptr, NULL,
 cpu_addr);
   if (r)
   return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index bf9759758f0d..b2b7e55ac486 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -284,7 +284,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
   u32 domain, struct amdgpu_bo **bo_ptr,
   u64 *gpu_addr, void **cpu_addr);
   int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
-uint64_t offset, uint64_t size,
+uint64_t offset, uint64_t size, uint32_t domain,
  struct amdgpu_bo **bo_ptr, void **cpu_addr);
   int amdgpu_bo_create_user(struct amdgpu_device *adev,
 struct amdgpu_bo_param *bp,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 08355f981313..4cec90debe46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1591,6 +1591,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct 
amdgpu_device *adev)
   return amdgpu_bo_create_kernel_at(adev,
 adev->mman.fw_vram_usage_start_offset,
 adev->mman.fw_vram_usage_size,
+   AMDGPU_GEM_DOMAIN_VRAM,
 &adev->mman.fw_vram_usage_reserved_bo,
 &adev->mman.fw_vram_usage_va);
   }
@@ -1616,6 +1617,7 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct 
amdgpu_device *adev)
   return amdgpu_bo_create_kernel_at(adev,
 adev->mman.drv_vram_usage_start_offset,
 adev->mman.drv_vram_usage_size,
+   AMDGPU_GEM_DOMAIN_VRAM,
 &adev->mman.drv_vram_usage_reserved_bo,
 &adev->mman.drv_vram_usage_va);
   }
@@ -1696,6 +1698,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device 
*adev)
   ret = amdgpu_bo_create_kernel_at(adev,
ctx->c2p_train_data_offset,
ctx->train_data_size,
+  AMDGPU_GEM_DOMAIN_VRAM,
&ctx->c2p_bo,
  

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-06 Thread Christian König

Am 06.02.23 um 17:56 schrieb Alex Deucher:

On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma  wrote:

Hey Alex,

On 03/02/2023 23:07, Alex Deucher wrote:

On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma  wrote:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
   include/uapi/drm/amdgpu_drm.h | 53 +++
   1 file changed, 53 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4038abe8505a..6c5235d107b3 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
   #define DRM_AMDGPU_VM  0x13
   #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
   #define DRM_AMDGPU_SCHED   0x15
+#define DRM_AMDGPU_USERQ   0x16

   #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
   #define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
   #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
   #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)

   /**
* DOC: memory domains
@@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
  union drm_amdgpu_ctx_out out;
   };

+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;

Since doorbells are 64 bit, maybe this offset should be in qwords.

Can you please help to cross check this information ? All the existing
kernel doorbell calculations are keeping doorbells size as sizeof(u32)

Doorbells on pre-vega hardware are 32 bits so that is where that comes
from, but from vega onward most doorbells are 64 bit.  I think some
versions of VCN may still use 32 bit doorbells.  Internally in the
kernel driver we just use two slots for newer hardware, but for the
UAPI, I think we can just stick with 64 bit slots to avoid confusion.
Even if an engine only uses a 32 bit one, I don't know that there is
much value to trying to support variable doorbell sizes.


I think we can stick with using __u32 because this is *not* the size of 
the doorbell entries.


Instead this is the offset into the BO where to find the doorbell for 
this queue (which then in turn is 64bits wide).


Since we will probably never have more than 4GiB doorbells we should be 
pretty save to use 32bits here.


Christian.



Alex


+   /** GPU virtual address of the queue */
+   __u64   queue_va;
+   /** Size of the queue in bytes */
+   __u64   queue_size;
+   /** GPU virtual address of the rptr */
+   __u64   rptr_va;
+   /** GPU virtual address of the wptr */
+   __u64   wptr_va;
+};
+
+struct drm_amdgpu_userq_in {
+   /** AMDGPU_USERQ_OP_* */
+   __u32   op;
+   /** Flags */
+   __u32   flags;
+   /** Queue handle to associate the queue free call with,
+* unused for queue create calls */
+   __u32   queue_id;
+   __u32   pad;
+   /** Queue descriptor */
+   struct drm_amdgpu_userq_mqd mqd;
+};
+
+struct drm_amdgpu_userq_out {
+   /** Queue handle */
+   __u32   q_id;

Maybe this should be queue_id to match the input.

Agree.

- Shashank


Alex


+   /** Flags */
+   __u32   flags;
+};
+
+union drm_amdgpu_userq {
+   struct drm_amdgpu_userq_in in;
+   struct drm_amdgpu_userq_out out;
+};
+
   /* vm ioctl */
   #define AMDGPU_VM_OP_RESERVE_VMID  1
   #define AMDGPU_VM_OP_UNRESERVE_VMID2
--
2.34.1





Re: [PATCH 12/13] drm/amdgpu: add domain info in bo_create_kernel_at

2023-02-06 Thread Alex Deucher
On Mon, Feb 6, 2023 at 11:51 AM Christian König
 wrote:
>
> Am 03.02.23 um 20:08 schrieb Shashank Sharma:
> > From: Shashank Sharma 
> >
> > This patch adds a domain input variable for amdgpu_bo_create_kernel_at,
> > so that it could be used for both VRAM and DOORBELL domains objects. It
> > also adds supporting code for existing callers.
>
> We should probably drop this one as well.
>
> We just removed the domain from the function because we only have BIOS
> reserved regions in VRAM, never anywhere else.
>
> Allocating a doorbell for the kernel is not really an use case for
> amdgpu_bo_create_kernel_at().

We just need some way to guarantee that the kernel always gets the
first page.  It's required for SR-IOV compatibility.

Alex

>
> Christian.
>
> >
> > Signed-off-by: Shashank Sharma 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 7 +++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 1 +
> >   4 files changed, 11 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > index ef1f3106bc69..dec391fa42dc 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > @@ -367,7 +367,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
> >* 0 on success, negative error code otherwise.
> >*/
> >   int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
> > -uint64_t offset, uint64_t size,
> > +uint64_t offset, uint64_t size, uint32_t 
> > domain,
> >  struct amdgpu_bo **bo_ptr, void **cpu_addr)
> >   {
> >   struct ttm_operation_ctx ctx = { false, false };
> > @@ -378,7 +378,7 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device 
> > *adev,
> >   size = ALIGN(size, PAGE_SIZE);
> >
> >   r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
> > -   AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
> > +   domain, bo_ptr, NULL,
> > cpu_addr);
> >   if (r)
> >   return r;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> > index bf9759758f0d..b2b7e55ac486 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> > @@ -284,7 +284,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
> >   u32 domain, struct amdgpu_bo **bo_ptr,
> >   u64 *gpu_addr, void **cpu_addr);
> >   int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
> > -uint64_t offset, uint64_t size,
> > +uint64_t offset, uint64_t size, uint32_t 
> > domain,
> >  struct amdgpu_bo **bo_ptr, void **cpu_addr);
> >   int amdgpu_bo_create_user(struct amdgpu_device *adev,
> > struct amdgpu_bo_param *bp,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 08355f981313..4cec90debe46 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -1591,6 +1591,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct 
> > amdgpu_device *adev)
> >   return amdgpu_bo_create_kernel_at(adev,
> > 
> > adev->mman.fw_vram_usage_start_offset,
> > adev->mman.fw_vram_usage_size,
> > +   AMDGPU_GEM_DOMAIN_VRAM,
> > 
> > &adev->mman.fw_vram_usage_reserved_bo,
> > &adev->mman.fw_vram_usage_va);
> >   }
> > @@ -1616,6 +1617,7 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct 
> > amdgpu_device *adev)
> >   return amdgpu_bo_create_kernel_at(adev,
> > 
> > adev->mman.drv_vram_usage_start_offset,
> > adev->mman.drv_vram_usage_size,
> > +   AMDGPU_GEM_DOMAIN_VRAM,
> > 
> > &adev->mman.drv_vram_usage_reserved_bo,
> > &adev->mman.drv_vram_usage_va);
> >   }
> > @@ -1696,6 +1698,7 @@ static int amdgpu_ttm_reserve_tmr(struct 
> > amdgpu_device *adev)
> >   ret = amdgpu_bo_create_kernel_at(adev,
> >ctx->c2p_train_data_offset,
> >ctx->train_data_size,
> > +  AMDGPU_GEM_DOMAIN_VRAM,
> >&ctx->c2p_bo,
> >NULL);
>

Re: [PATCH 14/14] drm/amdgpu: introduce doorbell bo in kernel

2023-02-06 Thread Christian König

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

This patch does the following:
- Removes doorbell ptr from adev.
- Moves doorbell.num_doorbells into mman.num_doorbells.
- Introduces a kernel bo for doorbell management in form of 
mman.doorbell_kernel_bo.
   This bo holds the doorbell space now.
- Introduces mman.doorbell_bo_size: to save total doorbell memory size.
- Also introduces mman.doorbell_va to save CPU address of doorbell BO
   mapping. This ptr will be used now for doorbell read/write from doorbell BAR.


Please don't call any CPU address VA, this is a term we usually use with 
GPUVM.


Rather use doorbell_cpu_addr for this.

Christian.



Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h  |  1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  5 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 16 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 24 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  6 +
  6 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e1c1a360614e..dd3875ae1ad3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -834,7 +834,6 @@ struct amdgpu_device {
spinlock_t audio_endpt_idx_lock;
amdgpu_block_rreg_t audio_endpt_rreg;
amdgpu_block_wreg_t audio_endpt_wreg;
-   struct amdgpu_doorbell  doorbell;
  
  	/* clock/pll info */

struct amdgpu_clockclock;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 28076da2258f..aea943e337df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -109,11 +109,10 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,
*aperture_base = adev->gmc.doorbell_aper_base;
*aperture_size = 0;
*start_offset = 0;
-   } else if (adev->gmc.doorbell_aper_size > adev->doorbell.num_doorbells *
-   sizeof(u32)) {
+   } else if (adev->gmc.doorbell_aper_size > 
adev->mman.doorbell_kernel_bo_size) {
*aperture_base = adev->gmc.doorbell_aper_base;
*aperture_size = adev->gmc.doorbell_aper_size;
-   *start_offset = adev->doorbell.num_doorbells * sizeof(u32);
+   *start_offset = adev->mman.doorbell_kernel_bo_size;
} else {
*aperture_base = 0;
*aperture_size = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7c21ffe63ebc..9c3ce1ae66ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -593,8 +593,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 
index)
if (amdgpu_device_skip_hw_access(adev))
return 0;
  
-	if (index < adev->doorbell.num_doorbells) {

-   return readl(adev->mman.doorbell_aper_base_kaddr + index);
+   if (index < adev->mman.num_doorbells) {
+   return readl(adev->mman.doorbell_va + index);
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
return 0;
@@ -616,8 +616,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 
index, u32 v)
if (amdgpu_device_skip_hw_access(adev))
return;
  
-	if (index < adev->doorbell.num_doorbells) {

-   writel(v, adev->mman.doorbell_aper_base_kaddr + index);
+   if (index < adev->mman.num_doorbells) {
+   writel(v, adev->mman.doorbell_va + index);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
}
@@ -637,8 +637,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 
index)
if (amdgpu_device_skip_hw_access(adev))
return 0;
  
-	if (index < adev->doorbell.num_doorbells) {

-   return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index));
+   if (index < adev->mman.num_doorbells) {
+   return atomic64_read((atomic64_t *)(adev->mman.doorbell_va + 
index));
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
return 0;
@@ -660,8 +660,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 
index, u64 v)
if (amdgpu_device_skip_hw_access(adev))
return;
  
-	if (index < adev->doorbell.num_doorbells) {

-   atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr 
+ index), v);
+   if (index < adev->mman.num_doorbells) {
+   atomic64_set((atomic64_t *)(adev->mman.d

Re: [PATCH 1/8] drm/amdgpu: UAPI for user queue management

2023-02-06 Thread Alex Deucher
On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma  wrote:
>
> Hey Alex,
>
> On 03/02/2023 23:07, Alex Deucher wrote:
> > On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma  
> > wrote:
> >> From: Alex Deucher 
> >>
> >> This patch intorduces new UAPI/IOCTL for usermode graphics
> >> queue. The userspace app will fill this structure and request
> >> the graphics driver to add a graphics work queue for it. The
> >> output of this UAPI is a queue id.
> >>
> >> This UAPI maps the queue into GPU, so the graphics app can start
> >> submitting work to the queue as soon as the call returns.
> >>
> >> Cc: Alex Deucher 
> >> Cc: Christian Koenig 
> >> Signed-off-by: Alex Deucher 
> >> Signed-off-by: Shashank Sharma 
> >> ---
> >>   include/uapi/drm/amdgpu_drm.h | 53 +++
> >>   1 file changed, 53 insertions(+)
> >>
> >> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> >> index 4038abe8505a..6c5235d107b3 100644
> >> --- a/include/uapi/drm/amdgpu_drm.h
> >> +++ b/include/uapi/drm/amdgpu_drm.h
> >> @@ -54,6 +54,7 @@ extern "C" {
> >>   #define DRM_AMDGPU_VM  0x13
> >>   #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
> >>   #define DRM_AMDGPU_SCHED   0x15
> >> +#define DRM_AMDGPU_USERQ   0x16
> >>
> >>   #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
> >> DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >>   #define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
> >> DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> >> @@ -71,6 +72,7 @@ extern "C" {
> >>   #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
> >> DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >>   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
> >> DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >>   #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
> >> DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> >> +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
> >> DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >>
> >>   /**
> >>* DOC: memory domains
> >> @@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
> >>  union drm_amdgpu_ctx_out out;
> >>   };
> >>
> >> +/* user queue IOCTL */
> >> +#define AMDGPU_USERQ_OP_CREATE 1
> >> +#define AMDGPU_USERQ_OP_FREE   2
> >> +
> >> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
> >> +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
> >> +
> >> +struct drm_amdgpu_userq_mqd {
> >> +   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> >> +   __u32   flags;
> >> +   /** IP type: AMDGPU_HW_IP_* */
> >> +   __u32   ip_type;
> >> +   /** GEM object handle */
> >> +   __u32   doorbell_handle;
> >> +   /** Doorbell offset in dwords */
> >> +   __u32   doorbell_offset;
> > Since doorbells are 64 bit, maybe this offset should be in qwords.
>
> Can you please help to cross check this information ? All the existing
> kernel doorbell calculations are keeping doorbells size as sizeof(u32)

Doorbells on pre-vega hardware are 32 bits so that is where that comes
from, but from vega onward most doorbells are 64 bit.  I think some
versions of VCN may still use 32 bit doorbells.  Internally in the
kernel driver we just use two slots for newer hardware, but for the
UAPI, I think we can just stick with 64 bit slots to avoid confusion.
Even if an engine only uses a 32 bit one, I don't know that there is
much value to trying to support variable doorbell sizes.

Alex

>
> >
> >> +   /** GPU virtual address of the queue */
> >> +   __u64   queue_va;
> >> +   /** Size of the queue in bytes */
> >> +   __u64   queue_size;
> >> +   /** GPU virtual address of the rptr */
> >> +   __u64   rptr_va;
> >> +   /** GPU virtual address of the wptr */
> >> +   __u64   wptr_va;
> >> +};
> >> +
> >> +struct drm_amdgpu_userq_in {
> >> +   /** AMDGPU_USERQ_OP_* */
> >> +   __u32   op;
> >> +   /** Flags */
> >> +   __u32   flags;
> >> +   /** Queue handle to associate the queue free call with,
> >> +* unused for queue create calls */
> >> +   __u32   queue_id;
> >> +   __u32   pad;
> >> +   /** Queue descriptor */
> >> +   struct drm_amdgpu_userq_mqd mqd;
> >> +};
> >> +
> >> +struct drm_amdgpu_userq_out {
> >> +   /** Queue handle */
> >> +   __u32   q_id;
> > Maybe this should be queue_id to match the input.
>
> Agree.
>
> - Shashank
>
> > Alex
> >
> >> +   /** Flags */
> >> +   __u32   flags;
> >> +};
> >> +
> >> +union drm_amdgpu_userq {
> >> +   struct drm_amdgpu_userq_in in;
> >> +   struct drm_amdgpu_userq_out out;
> >> +};
> >> +
> >>   /* vm ioctl */
> >>   #define AMDGPU_VM_OP_RESERVE_VMID  1
> >>   #define AMDGPU_VM_OP_UNRESERVE_VMID2
> >> --
> >> 2.34.1
> >>


Re: [PATCH 12/14] drm/amdgpu: initialize doorbell memory pool

2023-02-06 Thread Christian König

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

This patch initializes doorbell pool with bar manager, which will
divide all the doorbell memory into pages.

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e04409210415..95017de6b23d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1837,6 +1837,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
if (r)
return r;
  
+	/* Initialize DOORBELL pool with all of DOORBELL divided into pages */

+   r = amdgpu_bar_mgr_init(adev, AMDGPU_PL_DOORBELL);


Just replacing this with ttm_range_man_init() should be sufficient to 
not use the VRAM manager here.


Christian.


+   if (r) {
+   DRM_ERROR("Failed initializing DOORBELL heap.\n");
+   return r;
+   }
+
/*
 *The reserved vram for firmware must be pinned to the specified
 *place on the VRAM, so reserve it early.
@@ -1890,6 +1897,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
  
+	DRM_INFO("amdgpu: %uM of DOORBELL memory ready\n",

+(unsigned) (adev->gmc.doorbell_aper_size / (1024 * 1024)));
+
/* Compute GTT size, either based on 1/2 the size of RAM size
 * or whatever the user passed on module init */
if (amdgpu_gtt_size == -1) {
@@ -1991,6 +2001,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
}
  
  	amdgpu_bar_mgr_fini(adev, TTM_PL_VRAM);

+   amdgpu_bar_mgr_fini(adev, AMDGPU_PL_DOORBELL);
amdgpu_gtt_mgr_fini(adev);
amdgpu_preempt_mgr_fini(adev);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);




Re: [PATCH 12/13] drm/amdgpu: add domain info in bo_create_kernel_at

2023-02-06 Thread Christian König

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Shashank Sharma 

This patch adds a domain input variable for amdgpu_bo_create_kernel_at,
so that it could be used for both VRAM and DOORBELL domains objects. It
also adds supporting code for existing callers.


We should probably drop this one as well.

We just removed the domain from the function because we only have BIOS 
reserved regions in VRAM, never anywhere else.


Allocating a doorbell for the kernel is not really an use case for 
amdgpu_bo_create_kernel_at().


Christian.



Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 7 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 1 +
  4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ef1f3106bc69..dec391fa42dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -367,7 +367,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
   * 0 on success, negative error code otherwise.
   */
  int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
-  uint64_t offset, uint64_t size,
+  uint64_t offset, uint64_t size, uint32_t domain,
   struct amdgpu_bo **bo_ptr, void **cpu_addr)
  {
struct ttm_operation_ctx ctx = { false, false };
@@ -378,7 +378,7 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
size = ALIGN(size, PAGE_SIZE);
  
  	r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,

- AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
+ domain, bo_ptr, NULL,
  cpu_addr);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index bf9759758f0d..b2b7e55ac486 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -284,7 +284,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr);
  int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
-  uint64_t offset, uint64_t size,
+  uint64_t offset, uint64_t size, uint32_t domain,
   struct amdgpu_bo **bo_ptr, void **cpu_addr);
  int amdgpu_bo_create_user(struct amdgpu_device *adev,
  struct amdgpu_bo_param *bp,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 08355f981313..4cec90debe46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1591,6 +1591,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct 
amdgpu_device *adev)
return amdgpu_bo_create_kernel_at(adev,
  adev->mman.fw_vram_usage_start_offset,
  adev->mman.fw_vram_usage_size,
+ AMDGPU_GEM_DOMAIN_VRAM,
  &adev->mman.fw_vram_usage_reserved_bo,
  &adev->mman.fw_vram_usage_va);
  }
@@ -1616,6 +1617,7 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct 
amdgpu_device *adev)
return amdgpu_bo_create_kernel_at(adev,
  
adev->mman.drv_vram_usage_start_offset,
  adev->mman.drv_vram_usage_size,
+ AMDGPU_GEM_DOMAIN_VRAM,
  
&adev->mman.drv_vram_usage_reserved_bo,
  &adev->mman.drv_vram_usage_va);
  }
@@ -1696,6 +1698,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device 
*adev)
ret = amdgpu_bo_create_kernel_at(adev,
 ctx->c2p_train_data_offset,
 ctx->train_data_size,
+AMDGPU_GEM_DOMAIN_VRAM,
 &ctx->c2p_bo,
 NULL);
if (ret) {
@@ -1709,6 +1712,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device 
*adev)
ret = amdgpu_bo_create_kernel_at(adev,
adev->gmc.real_vram_size - 
adev->mman.discovery_tmr_size,
adev->mman.discovery_tmr_size,
+   AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.discovery_memory,
NULL);
if (ret) 

Re: [PATCH 10/13] drm/amdgpu: doorbell support in get_memory functions

2023-02-06 Thread Christian König

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

This patch adds section for doorbell memory in memory status
reporting functions like vm/bo_get_memory.


Marek reworked this just recently to pass around a structure. You should 
probably rebase the code when that patch lands.


Apart from that looks good to me.

Christian.



Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c |  4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  9 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 ++-
  5 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index 99a7855ab1bc..202df09ba5de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -60,7 +60,7 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
  
-	uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;

+   uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0, doorbell_mem = 0;
ktime_t usage[AMDGPU_HW_IP_NUM];
uint32_t bus, dev, fn, domain;
unsigned int hw_ip;
@@ -75,7 +75,7 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
if (ret)
return;
  
-	amdgpu_vm_get_memory(vm, &vram_mem, >t_mem, &cpu_mem);

+   amdgpu_vm_get_memory(vm, &vram_mem, >t_mem, &cpu_mem, &doorbell_mem);
amdgpu_bo_unreserve(vm->root.bo);
  
  	amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b2cfd46c459b..ef1f3106bc69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1288,7 +1288,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
  }
  
  void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,

-   uint64_t *gtt_mem, uint64_t *cpu_mem)
+ uint64_t *gtt_mem, uint64_t *cpu_mem,
+ uint64_t *doorbell_mem)
  {
unsigned int domain;
  
@@ -1300,6 +1301,9 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,

case AMDGPU_GEM_DOMAIN_GTT:
*gtt_mem += amdgpu_bo_size(bo);
break;
+   case AMDGPU_GEM_DOMAIN_DOORBELL:
+   *doorbell_mem += amdgpu_bo_size(bo);
+   break;
case AMDGPU_GEM_DOMAIN_CPU:
default:
*cpu_mem += amdgpu_bo_size(bo);
@@ -1578,6 +1582,9 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, 
struct seq_file *m)
case AMDGPU_GEM_DOMAIN_GTT:
placement = " GTT";
break;
+   case AMDGPU_GEM_DOMAIN_DOORBELL:
+   placement = "DOOR";
+   break;
case AMDGPU_GEM_DOMAIN_CPU:
default:
placement = " CPU";
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 082f451d26f4..bf9759758f0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -326,7 +326,8 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, 
bool intr);
  u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
  u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
  void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem,
- uint64_t *gtt_mem, uint64_t *cpu_mem);
+ uint64_t *gtt_mem, uint64_t *cpu_mem,
+ uint64_t *doorbell_mem);
  void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo);
  int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
 struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dc379dc22c77..1561d138945b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -918,7 +918,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  }
  
  void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,

-   uint64_t *gtt_mem, uint64_t *cpu_mem)
+ uint64_t *gtt_mem, uint64_t *cpu_mem,
+ uint64_t *doorbell_mem)
  {
struct amdgpu_bo_va *bo_va, *tmp;
  
@@ -927,37 +928,37 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,

if (!bo_va->base.bo)
continue;
amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
-   gtt_mem, cpu_mem);
+gtt_me

Re: [PATCH 09/13] drm/amdgpu: accommodate DOMAIN/PL_DOORBELL

2023-02-06 Thread Alex Deucher
On Mon, Feb 6, 2023 at 6:30 AM Christian König  wrote:
>
> Am 03.02.23 um 20:08 schrieb Shashank Sharma:
> > From: Alex Deucher 
> >
> > This patch adds changes to accommodate the new GEM/TTM domain
> > for doorbell memory.
> >
> > Signed-off-by: Alex Deucher 
> > Signed-off-by: Shashank Sharma 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c | 19 ++--
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c |  3 ++-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  | 24 -
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 ++-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  3 ++-
> >   7 files changed, 58 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index e3e2e6e3b485..e1c1a360614e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -974,6 +974,7 @@ struct amdgpu_device {
> >   atomic64_t vram_pin_size;
> >   atomic64_t visible_pin_size;
> >   atomic64_t gart_pin_size;
> > + atomic64_t doorbell_pin_size;
>
> Please drop that, the amount of pinned doorbells is not needed as far as
> I can see.
>
> >
> >   /* soc15 register offset based on ip, instance and  segment */
> >   uint32_t*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
> > index 0656e5bb4f05..43a3137019b1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
> > @@ -659,15 +659,17 @@ static void amdgpu_bar_mgr_del(struct 
> > ttm_resource_manager *man,
> >* @dev: the other device
> >* @dir: dma direction
> >* @sgt: resulting sg table
> > + * @mem_type: memory type
> >*
> >* Allocate and fill a sg table from a VRAM allocation.
> >*/
> >   int amdgpu_bar_mgr_alloc_sgt(struct amdgpu_device *adev,
> > -   struct ttm_resource *res,
> > -   u64 offset, u64 length,
> > -   struct device *dev,
> > -   enum dma_data_direction dir,
> > -   struct sg_table **sgt)
> > +  struct ttm_resource *res,
> > +  u64 offset, u64 length,
> > +  struct device *dev,
> > +  enum dma_data_direction dir,
> > +  struct sg_table **sgt,
> > +  u32 mem_type)
>
> And again that doesn't make any sense at all.
>
> For now we don't want to export doorbells through DMA-buf.
>
> >   {
> >   struct amdgpu_res_cursor cursor;
> >   struct scatterlist *sg;
> > @@ -701,10 +703,15 @@ int amdgpu_bar_mgr_alloc_sgt(struct amdgpu_device 
> > *adev,
> >*/
> >   amdgpu_res_first(res, offset, length, &cursor);
> >   for_each_sgtable_sg((*sgt), sg, i) {
> > - phys_addr_t phys = cursor.start + adev->gmc.vram_aper_base;
> > + phys_addr_t phys = cursor.start;
> >   size_t size = cursor.size;
> >   dma_addr_t addr;
> >
> > + if (mem_type == TTM_PL_VRAM)
> > + phys += adev->gmc.vram_aper_base;
> > + else
> > + phys += adev->gmc.doorbell_aper_base;
> > +
> >   addr = dma_map_resource(dev, phys, size, dir,
> >   DMA_ATTR_SKIP_CPU_SYNC);
> >   r = dma_mapping_error(dev, addr);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> > index c48ccde281c3..c645bdc49f34 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> > @@ -179,9 +179,10 @@ static struct sg_table *amdgpu_dma_buf_map(struct 
> > dma_buf_attachment *attach,
> >   break;
> >
> >   case TTM_PL_VRAM:
> > + case AMDGPU_PL_DOORBELL:
> >   r = amdgpu_bar_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
> >bo->tbo.base.size, attach->dev,
> > -  dir, &sgt);
> > +  dir, &sgt, 
> > bo->tbo.resource->mem_type);
> >   if (r)
> >   return ERR_PTR(r);
> >   break;
>
> That stuff can be dropped as well as far as I can see.
>
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > index 887fc53a7d16..b2cfd46c459b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > @@ -147,6 +147,18 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo 
> > 

Re: [PATCH 09/14] drm/amdgpu: move doorbell aperture handling into ttm_init

2023-02-06 Thread Christian König

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

To consolidate it with vram handling.

Signed-off-by: Alex Deucher 
Signed-off-by: Alex Deucher 


I can't check all the implementation details, but from a mile high view 
that looks correct.


Acked-by: Christian König 

Regards,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 96 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 64 +++
  2 files changed, 71 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b07b7679bf9f..7c21ffe63ebc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1016,82 +1016,6 @@ int amdgpu_device_pci_reset(struct amdgpu_device *adev)
return pci_reset_function(adev->pdev);
  }
  
-/*

- * GPU doorbell aperture helpers function.
- */
-/**
- * amdgpu_device_doorbell_init - Init doorbell driver information.
- *
- * @adev: amdgpu_device pointer
- *
- * Init doorbell driver information (CIK)
- * Returns 0 on success, error on failure.
- */
-static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
-{
-
-   /* No doorbell on SI hardware generation */
-   if (adev->asic_type < CHIP_BONAIRE) {
-   adev->gmc.doorbell_aper_base = 0;
-   adev->gmc.doorbell_aper_size = 0;
-   adev->doorbell.num_doorbells = 0;
-   adev->mman.doorbell_aper_base_kaddr = NULL;
-   return 0;
-   }
-
-   if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
-   return -EINVAL;
-
-   amdgpu_asic_init_doorbell_index(adev);
-
-   /* doorbell bar mapping */
-   adev->gmc.doorbell_aper_base = pci_resource_start(adev->pdev, 2);
-   adev->gmc.doorbell_aper_size = pci_resource_len(adev->pdev, 2);
-
-   if (adev->enable_mes) {
-   adev->doorbell.num_doorbells =
-   adev->gmc.doorbell_aper_size / sizeof(u32);
-   } else {
-   adev->doorbell.num_doorbells =
-   min_t(u32, adev->gmc.doorbell_aper_size / sizeof(u32),
- adev->doorbell_index.max_assignment+1);
-   if (adev->doorbell.num_doorbells == 0)
-   return -EINVAL;
-
-   /* For Vega, reserve and map two pages on doorbell BAR since 
SDMA
-* paging queue doorbell use the second page. The
-* AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
-* doorbells are in the first page. So with paging queue 
enabled,
-* the max num_doorbells should + 1 page (0x400 in dword)
-*/
-   if (adev->asic_type >= CHIP_VEGA10)
-   adev->doorbell.num_doorbells += 0x400;
-   }
-
-   adev->mman.doorbell_aper_base_kaddr = 
ioremap(adev->gmc.doorbell_aper_base,
- 
adev->doorbell.num_doorbells *
- sizeof(u32));
-   if (adev->mman.doorbell_aper_base_kaddr == NULL)
-   return -ENOMEM;
-
-   return 0;
-}
-
-/**
- * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
- *
- * @adev: amdgpu_device pointer
- *
- * Tear down doorbell driver information (CIK)
- */
-static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
-{
-   iounmap(adev->mman.doorbell_aper_base_kaddr);
-   adev->mman.doorbell_aper_base_kaddr = NULL;
-}
-
-
-
  /*
   * amdgpu_device_wb_*()
   * Writeback is the method by which the GPU updates special pages in memory
@@ -1239,7 +1163,6 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device 
*adev)
  cmd & ~PCI_COMMAND_MEMORY);
  
  	/* Free the VRAM and doorbell BAR, we most likely need to move both. */

-   amdgpu_device_doorbell_fini(adev);
if (adev->asic_type >= CHIP_BONAIRE)
pci_release_resource(adev->pdev, 2);
  
@@ -1253,11 +1176,10 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
  
  	pci_assign_unassigned_bus_resources(adev->pdev->bus);
  
-	/* When the doorbell or fb BAR isn't available we have no chance of

-* using the device.
-*/
-   r = amdgpu_device_doorbell_init(adev);
-   if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
+   if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
+   return -ENODEV;
+
+   if (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)
return -ENODEV;
  
  	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);

@@ -3711,9 +3633,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (!adev->have_atomics_support)
dev_info(adev->dev, "PCIE atomic ops is not supported\n");
  
-	/* doorbell bar mapping and doorbell index init*/

-   amdgpu_device_doorbell_init(adev);
-

Re: [PATCH 02/13] drm/amdgpu: rename vram_mgr functions to bar_mgr

2023-02-06 Thread Alex Deucher
On Mon, Feb 6, 2023 at 6:21 AM Christian König  wrote:
>
> Am 03.02.23 um 20:08 schrieb Shashank Sharma:
> > From: Alex Deucher 
> >
> > Rename the VRAM manager functions so they can be resused to manage
> > doorbell BAR as well.
>
> Yeah, as said before Alex did this before we had the buddy allocator.
>
> This doesn't makes sense any more and should probably be dropped completely.

I had suggested that we just use this as is for now so we could get
everything up and running to prove this out.  Now that we have this
actually working, we can look at dropping this.

Alex


>
> Christian.
>
> >
> > Cc: Christian Koenig 
> > Signed-off-by: Alex Deucher 
> > Signed-off-by: Shashank Sharma 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|   2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c   |   8 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   4 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |   4 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c|   4 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c   |   4 +-
> >   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h|  14 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |   4 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  36 ++--
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |   2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 200 ++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  18 +-
> >   12 files changed, 157 insertions(+), 143 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index 7b5ce00f0602..e34eae8d64cf 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -723,7 +723,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct 
> > amdgpu_device *adev,
> >   if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
> >   u64 total_vis_vram = adev->gmc.visible_vram_size;
> >   u64 used_vis_vram =
> > -   amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
> > +   amdgpu_bar_mgr_vis_usage(&adev->mman.vram_mgr);
> >
> >   if (used_vis_vram < total_vis_vram) {
> >   u64 free_vis_vram = total_vis_vram - used_vis_vram;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> > index 271e30e34d93..c48ccde281c3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> > @@ -179,9 +179,9 @@ static struct sg_table *amdgpu_dma_buf_map(struct 
> > dma_buf_attachment *attach,
> >   break;
> >
> >   case TTM_PL_VRAM:
> > - r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
> > -   bo->tbo.base.size, attach->dev,
> > -   dir, &sgt);
> > + r = amdgpu_bar_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
> > +  bo->tbo.base.size, attach->dev,
> > +  dir, &sgt);
> >   if (r)
> >   return ERR_PTR(r);
> >   break;
> > @@ -215,7 +215,7 @@ static void amdgpu_dma_buf_unmap(struct 
> > dma_buf_attachment *attach,
> >   sg_free_table(sgt);
> >   kfree(sgt);
> >   } else {
> > - amdgpu_vram_mgr_free_sgt(attach->dev, dir, sgt);
> > + amdgpu_bar_mgr_free_sgt(attach->dev, dir, sgt);
> >   }
> >   }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index cd4caaa29528..9f148ea7ca66 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -2807,12 +2807,12 @@ static struct pci_error_handlers 
> > amdgpu_pci_err_handler = {
> >   .resume = amdgpu_pci_resume,
> >   };
> >
> > -extern const struct attribute_group amdgpu_vram_mgr_attr_group;
> > +extern const struct attribute_group amdgpu_bar_mgr_attr_group;
> >   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
> >   extern const struct attribute_group amdgpu_vbios_version_attr_group;
> >
> >   static const struct attribute_group *amdgpu_sysfs_groups[] = {
> > - &amdgpu_vram_mgr_attr_group,
> > + &amdgpu_bar_mgr_attr_group,
> >   &amdgpu_gtt_mgr_attr_group,
> >   &amdgpu_vbios_version_attr_group,
> >   NULL,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > index 7aa7e52ca784..2ce11434fb22 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > @@ -650,7 +650,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void 
> > *data, struct drm_file *filp)
> >   ui64 = 
> > ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
> >   return copy

Re: [PATCH 0/8] AMDGPU usermode queues

2023-02-06 Thread Alex Deucher
On Mon, Feb 6, 2023 at 10:39 AM Michel Dänzer
 wrote:
>
> On 2/3/23 22:54, Shashank Sharma wrote:
> > From: Shashank Sharma 
> >
> > This patch series introduces AMDGPU usermode graphics queues.
> > User queues is a method of GPU workload submission into the graphics
> > hardware without any interaction with kernel/DRM schedulers. In this
> > method, a userspace graphics application can create its own workqueue
> > and submit it directly in the GPU HW.
> >
> > The general idea of how this is supposed to work:
> > - The application creates the following GPU objetcs:
> >   - A queue object to hold the workload packets.
> >   - A read pointer object.
> >   - A write pointer object.
> >   - A doorbell page.
> > - Kernel picks any 32-bit offset in the doorbell page for this queue.
> > - The application uses the usermode_queue_create IOCTL introduced in
> >   this patch, by passing the the GPU addresses of these objects (read
> >   ptr, write ptr, queue base address and doorbell address)
> > - The kernel creates the queue and maps it in the HW.
> > - The application can start submitting the data in the queue as soon as
> >   the kernel IOCTL returns.
> > - Once the data is filled in the queue, the app must write the number of
> >   dwords in the doorbell offset, and the GPU will start fetching the data.
> >
> > libDRM changes for this series and a sample DRM test program can be found
> > in the MESA merge request here:
> > https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/287
>
> I hope everyone's clear these libdrm_amdgpu changes won't be sufficient uAPI 
> validation to allow the kernel bits to be merged upstream.

Right, this is just what we have been using to bring up the feature so far.

Alex

>
> This will require an implementation in the Mesa radeonsi / RADV driver, 
> ideally with working implicit synchronization for BOs shared via dma-buf.
>
>
> --
> Earthling Michel Dänzer|  https://redhat.com
> Libre software enthusiast  | Mesa and Xwayland developer
>


Re: [PATCH] drm/amdgpu: Fix potential race processing vm->freed

2023-02-06 Thread Christian König

Am 06.02.23 um 16:52 schrieb Rob Clark:

On Mon, Feb 6, 2023 at 2:15 AM Christian König  wrote:

Am 03.02.23 um 19:10 schrieb Rob Clark:

From: Rob Clark 

If userspace calls the AMDGPU_CS ioctl from multiple threads, because
the vm is global to the drm_file, you can end up with multiple threads
racing in amdgpu_vm_clear_freed().  So the freed list should be
protected with the status_lock, similar to other vm lists.

Well this is nonsense. To process the freed list the VM root PD lock
must be held anyway.

If we have a call path where this isn't true then we have a major bug at
a different place here.

I'm not super familiar w/ the amdgpu cs parser stuff, but the only
thing that I'm seeing that protects things is the bo_list_mutex and it
isn't clear to me that this is 1:1 with the vm (it looks like it is
not).


Do you have a backtrace?

Take a look at the reservation object of vm->root.bo. This should always 
be locked first before doing *anything* in a CS.


If that isn't the case we have a much worse problem.


(I cc'd you on the bug report, jfyi)


I unfortunately only get a permission denied when I try to access that one.

Regards,
Christian.



BR,
-R


Regards,
Christian.


Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)")
Signed-off-by: Rob Clark 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 33 ++
   1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b9441ab457ea..aeed7bc1512f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1240,10 +1240,19 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
   struct amdgpu_bo_va_mapping *mapping;
   uint64_t init_pte_value = 0;
   struct dma_fence *f = NULL;
+ struct list_head freed;
   int r;

- while (!list_empty(&vm->freed)) {
- mapping = list_first_entry(&vm->freed,
+ /*
+  * Move the contents of the VM's freed list to a local list
+  * that we can iterate without racing against other threads:
+  */
+ spin_lock(&vm->status_lock);
+ list_replace_init(&vm->freed, &freed);
+ spin_unlock(&vm->status_lock);
+
+ while (!list_empty(&freed)) {
+ mapping = list_first_entry(&freed,
   struct amdgpu_bo_va_mapping, list);
   list_del(&mapping->list);

@@ -1258,6 +1267,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
   amdgpu_vm_free_mapping(adev, vm, mapping, f);
   if (r) {
   dma_fence_put(f);
+
+ /*
+  * Move any unprocessed mappings back to the freed
+  * list:
+  */
+ spin_lock(&vm->status_lock);
+ list_splice_tail(&freed, &vm->freed);
+ spin_unlock(&vm->status_lock);
+
   return r;
   }
   }
@@ -1583,11 +1601,14 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
   mapping->bo_va = NULL;
   trace_amdgpu_vm_bo_unmap(bo_va, mapping);

- if (valid)
+ if (valid) {
+ spin_lock(&vm->status_lock);
   list_add(&mapping->list, &vm->freed);
- else
+ spin_unlock(&vm->status_lock);
+ } else {
   amdgpu_vm_free_mapping(adev, vm, mapping,
  bo_va->last_pt_update);
+ }

   return 0;
   }
@@ -1671,7 +1692,9 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device 
*adev,
   tmp->last = eaddr;

   tmp->bo_va = NULL;
+ spin_lock(&vm->status_lock);
   list_add(&tmp->list, &vm->freed);
+ spin_unlock(&vm->status_lock);
   trace_amdgpu_vm_bo_unmap(NULL, tmp);
   }

@@ -1788,7 +1811,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
   amdgpu_vm_it_remove(mapping, &vm->va);
   mapping->bo_va = NULL;
   trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+ spin_lock(&vm->status_lock);
   list_add(&mapping->list, &vm->freed);
+ spin_unlock(&vm->status_lock);
   }
   list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
   list_del(&mapping->list);




Re: [PATCH 02/13] drm/amdgpu: rename vram_mgr functions to bar_mgr

2023-02-06 Thread Christian König

Am 06.02.23 um 16:34 schrieb Shashank Sharma:


On 06/02/2023 12:20, Christian König wrote:

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

Rename the VRAM manager functions so they can be resused to manage
doorbell BAR as well.


Yeah, as said before Alex did this before we had the buddy allocator.

This doesn't makes sense any more and should probably be dropped 
completely.


Christian.



Yeah, I was wondering if we just not use the buddy-allocation for 
doorbell, but still separate out the VRAM vs DB function, would that 
make better sense ?


Nope. One of the main reasons we have a separate buddy based manager for 
VRAM is that we have CPU visible vs invisible areas.


For the doorbell we simply don't have any of that, so the additional 
complexity is just superfluous and will sooner or later cause trouble.


Christian.



- Shashank






Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c    |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c   |   8 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c   |   4 +-
  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  14 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  36 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 200 
++

  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  18 +-
  12 files changed, 157 insertions(+), 143 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 7b5ce00f0602..e34eae8d64cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -723,7 +723,7 @@ static void 
amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,

  if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
  u64 total_vis_vram = adev->gmc.visible_vram_size;
  u64 used_vis_vram =
- amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
+ amdgpu_bar_mgr_vis_usage(&adev->mman.vram_mgr);
    if (used_vis_vram < total_vis_vram) {
  u64 free_vis_vram = total_vis_vram - used_vis_vram;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index 271e30e34d93..c48ccde281c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -179,9 +179,9 @@ static struct sg_table 
*amdgpu_dma_buf_map(struct dma_buf_attachment *attach,

  break;
    case TTM_PL_VRAM:
-    r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
-  bo->tbo.base.size, attach->dev,
-  dir, &sgt);
+    r = amdgpu_bar_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
+ bo->tbo.base.size, attach->dev,
+ dir, &sgt);
  if (r)
  return ERR_PTR(r);
  break;
@@ -215,7 +215,7 @@ static void amdgpu_dma_buf_unmap(struct 
dma_buf_attachment *attach,

  sg_free_table(sgt);
  kfree(sgt);
  } else {
-    amdgpu_vram_mgr_free_sgt(attach->dev, dir, sgt);
+    amdgpu_bar_mgr_free_sgt(attach->dev, dir, sgt);
  }
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index cd4caaa29528..9f148ea7ca66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2807,12 +2807,12 @@ static struct pci_error_handlers 
amdgpu_pci_err_handler = {

  .resume    = amdgpu_pci_resume,
  };
  -extern const struct attribute_group amdgpu_vram_mgr_attr_group;
+extern const struct attribute_group amdgpu_bar_mgr_attr_group;
  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
  extern const struct attribute_group amdgpu_vbios_version_attr_group;
    static const struct attribute_group *amdgpu_sysfs_groups[] = {
-    &amdgpu_vram_mgr_attr_group,
+    &amdgpu_bar_mgr_attr_group,
  &amdgpu_gtt_mgr_attr_group,
  &amdgpu_vbios_version_attr_group,
  NULL,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index 7aa7e52ca784..2ce11434fb22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -650,7 +650,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, 
void *data, struct drm_file *filp)
  ui64 = 
ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);

  return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
  case AMDGPU_INFO_VIS_VRAM_USAGE:
-    ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
+    ui64 = amdgpu_bar_mgr_vis_usage(&adev->mman.vram_mgr);
  return copy_to_

Re: [PATCH] drm/amdgpu: Fix potential race processing vm->freed

2023-02-06 Thread Rob Clark
On Mon, Feb 6, 2023 at 2:15 AM Christian König  wrote:
>
> Am 03.02.23 um 19:10 schrieb Rob Clark:
> > From: Rob Clark 
> >
> > If userspace calls the AMDGPU_CS ioctl from multiple threads, because
> > the vm is global to the drm_file, you can end up with multiple threads
> > racing in amdgpu_vm_clear_freed().  So the freed list should be
> > protected with the status_lock, similar to other vm lists.
>
> Well this is nonsense. To process the freed list the VM root PD lock
> must be held anyway.
>
> If we have a call path where this isn't true then we have a major bug at
> a different place here.

I'm not super familiar w/ the amdgpu cs parser stuff, but the only
thing that I'm seeing that protects things is the bo_list_mutex and it
isn't clear to me that this is 1:1 with the vm (it looks like it is
not).

(I cc'd you on the bug report, jfyi)

BR,
-R

>
> Regards,
> Christian.
>
> >
> > Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)")
> > Signed-off-by: Rob Clark 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 33 ++
> >   1 file changed, 29 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index b9441ab457ea..aeed7bc1512f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -1240,10 +1240,19 @@ int amdgpu_vm_clear_freed(struct amdgpu_device 
> > *adev,
> >   struct amdgpu_bo_va_mapping *mapping;
> >   uint64_t init_pte_value = 0;
> >   struct dma_fence *f = NULL;
> > + struct list_head freed;
> >   int r;
> >
> > - while (!list_empty(&vm->freed)) {
> > - mapping = list_first_entry(&vm->freed,
> > + /*
> > +  * Move the contents of the VM's freed list to a local list
> > +  * that we can iterate without racing against other threads:
> > +  */
> > + spin_lock(&vm->status_lock);
> > + list_replace_init(&vm->freed, &freed);
> > + spin_unlock(&vm->status_lock);
> > +
> > + while (!list_empty(&freed)) {
> > + mapping = list_first_entry(&freed,
> >   struct amdgpu_bo_va_mapping, list);
> >   list_del(&mapping->list);
> >
> > @@ -1258,6 +1267,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
> >   amdgpu_vm_free_mapping(adev, vm, mapping, f);
> >   if (r) {
> >   dma_fence_put(f);
> > +
> > + /*
> > +  * Move any unprocessed mappings back to the freed
> > +  * list:
> > +  */
> > + spin_lock(&vm->status_lock);
> > + list_splice_tail(&freed, &vm->freed);
> > + spin_unlock(&vm->status_lock);
> > +
> >   return r;
> >   }
> >   }
> > @@ -1583,11 +1601,14 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
> >   mapping->bo_va = NULL;
> >   trace_amdgpu_vm_bo_unmap(bo_va, mapping);
> >
> > - if (valid)
> > + if (valid) {
> > + spin_lock(&vm->status_lock);
> >   list_add(&mapping->list, &vm->freed);
> > - else
> > + spin_unlock(&vm->status_lock);
> > + } else {
> >   amdgpu_vm_free_mapping(adev, vm, mapping,
> >  bo_va->last_pt_update);
> > + }
> >
> >   return 0;
> >   }
> > @@ -1671,7 +1692,9 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device 
> > *adev,
> >   tmp->last = eaddr;
> >
> >   tmp->bo_va = NULL;
> > + spin_lock(&vm->status_lock);
> >   list_add(&tmp->list, &vm->freed);
> > + spin_unlock(&vm->status_lock);
> >   trace_amdgpu_vm_bo_unmap(NULL, tmp);
> >   }
> >
> > @@ -1788,7 +1811,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
> >   amdgpu_vm_it_remove(mapping, &vm->va);
> >   mapping->bo_va = NULL;
> >   trace_amdgpu_vm_bo_unmap(bo_va, mapping);
> > + spin_lock(&vm->status_lock);
> >   list_add(&mapping->list, &vm->freed);
> > + spin_unlock(&vm->status_lock);
> >   }
> >   list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
> >   list_del(&mapping->list);
>


Re: [PATCH 0/8] AMDGPU usermode queues

2023-02-06 Thread Michel Dänzer
On 2/3/23 22:54, Shashank Sharma wrote:
> From: Shashank Sharma 
> 
> This patch series introduces AMDGPU usermode graphics queues.
> User queues is a method of GPU workload submission into the graphics
> hardware without any interaction with kernel/DRM schedulers. In this
> method, a userspace graphics application can create its own workqueue
> and submit it directly in the GPU HW.
> 
> The general idea of how this is supposed to work:
> - The application creates the following GPU objetcs:
>   - A queue object to hold the workload packets.
>   - A read pointer object.
>   - A write pointer object.
>   - A doorbell page.
> - Kernel picks any 32-bit offset in the doorbell page for this queue.
> - The application uses the usermode_queue_create IOCTL introduced in
>   this patch, by passing the the GPU addresses of these objects (read
>   ptr, write ptr, queue base address and doorbell address)
> - The kernel creates the queue and maps it in the HW.
> - The application can start submitting the data in the queue as soon as
>   the kernel IOCTL returns.
> - Once the data is filled in the queue, the app must write the number of
>   dwords in the doorbell offset, and the GPU will start fetching the data.
> 
> libDRM changes for this series and a sample DRM test program can be found
> in the MESA merge request here:
> https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/287

I hope everyone's clear these libdrm_amdgpu changes won't be sufficient uAPI 
validation to allow the kernel bits to be merged upstream.

This will require an implementation in the Mesa radeonsi / RADV driver, ideally 
with working implicit synchronization for BOs shared via dma-buf.


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer



Re: [PATCH 02/13] drm/amdgpu: rename vram_mgr functions to bar_mgr

2023-02-06 Thread Shashank Sharma



On 06/02/2023 12:20, Christian König wrote:

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

Rename the VRAM manager functions so they can be resused to manage
doorbell BAR as well.


Yeah, as said before Alex did this before we had the buddy allocator.

This doesn't makes sense any more and should probably be dropped 
completely.


Christian.



Yeah, I was wondering if we just not use the buddy-allocation for 
doorbell, but still separate out the VRAM vs DB function, would that 
make better sense ?


- Shashank






Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c    |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c   |   8 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c   |   4 +-
  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  14 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  36 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 200 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  18 +-
  12 files changed, 157 insertions(+), 143 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 7b5ce00f0602..e34eae8d64cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -723,7 +723,7 @@ static void 
amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,

  if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
  u64 total_vis_vram = adev->gmc.visible_vram_size;
  u64 used_vis_vram =
- amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
+ amdgpu_bar_mgr_vis_usage(&adev->mman.vram_mgr);
    if (used_vis_vram < total_vis_vram) {
  u64 free_vis_vram = total_vis_vram - used_vis_vram;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index 271e30e34d93..c48ccde281c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -179,9 +179,9 @@ static struct sg_table *amdgpu_dma_buf_map(struct 
dma_buf_attachment *attach,

  break;
    case TTM_PL_VRAM:
-    r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
-  bo->tbo.base.size, attach->dev,
-  dir, &sgt);
+    r = amdgpu_bar_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
+ bo->tbo.base.size, attach->dev,
+ dir, &sgt);
  if (r)
  return ERR_PTR(r);
  break;
@@ -215,7 +215,7 @@ static void amdgpu_dma_buf_unmap(struct 
dma_buf_attachment *attach,

  sg_free_table(sgt);
  kfree(sgt);
  } else {
-    amdgpu_vram_mgr_free_sgt(attach->dev, dir, sgt);
+    amdgpu_bar_mgr_free_sgt(attach->dev, dir, sgt);
  }
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index cd4caaa29528..9f148ea7ca66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2807,12 +2807,12 @@ static struct pci_error_handlers 
amdgpu_pci_err_handler = {

  .resume    = amdgpu_pci_resume,
  };
  -extern const struct attribute_group amdgpu_vram_mgr_attr_group;
+extern const struct attribute_group amdgpu_bar_mgr_attr_group;
  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
  extern const struct attribute_group amdgpu_vbios_version_attr_group;
    static const struct attribute_group *amdgpu_sysfs_groups[] = {
-    &amdgpu_vram_mgr_attr_group,
+    &amdgpu_bar_mgr_attr_group,
  &amdgpu_gtt_mgr_attr_group,
  &amdgpu_vbios_version_attr_group,
  NULL,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index 7aa7e52ca784..2ce11434fb22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -650,7 +650,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, 
void *data, struct drm_file *filp)
  ui64 = 
ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);

  return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
  case AMDGPU_INFO_VIS_VRAM_USAGE:
-    ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
+    ui64 = amdgpu_bar_mgr_vis_usage(&adev->mman.vram_mgr);
  return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
  case AMDGPU_INFO_GTT_USAGE:
  ui64 = 
ttm_resource_manager_usage(&adev->mman.gtt_mgr.manager);
@@ -704,7 +704,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, 
void *data, struct drm_file *filp)

  atomic64_read(&adev->visible_pin_size),
 

Re: [PATCH 01/13] drm/amdgpu: add UAPI for allocating doorbell memory

2023-02-06 Thread Shashank Sharma

Hey Christian,

On 06/02/2023 12:19, Christian König wrote:

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

Signed-off-by: Alex Deucher 
---
  include/uapi/drm/amdgpu_drm.h | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/uapi/drm/amdgpu_drm.h 
b/include/uapi/drm/amdgpu_drm.h

index 4038abe8505a..fd1c65d78ded 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -94,6 +94,9 @@ extern "C" {
   *
   * %AMDGPU_GEM_DOMAIN_OA    Ordered append, used by 3D or Compute 
engines

   * for appending data.
+ *
+ * %AMDGPU_GEM_DOMAIN_DOORBELL    Doorbell memory. It is an MMIO 
region for

+ * signalling user mode queues.


Drop "memory" from the sentence. The key point of doorbells is that 
they are *not* memory :)


Apart from that looks good to me,
Christian.



Noted.

- Shashank




   */
  #define AMDGPU_GEM_DOMAIN_CPU    0x1
  #define AMDGPU_GEM_DOMAIN_GTT    0x2
@@ -101,12 +104,14 @@ extern "C" {
  #define AMDGPU_GEM_DOMAIN_GDS    0x8
  #define AMDGPU_GEM_DOMAIN_GWS    0x10
  #define AMDGPU_GEM_DOMAIN_OA    0x20
+#define AMDGPU_GEM_DOMAIN_DOORBELL    0x40
  #define AMDGPU_GEM_DOMAIN_MASK    (AMDGPU_GEM_DOMAIN_CPU | \
   AMDGPU_GEM_DOMAIN_GTT | \
   AMDGPU_GEM_DOMAIN_VRAM | \
   AMDGPU_GEM_DOMAIN_GDS | \
   AMDGPU_GEM_DOMAIN_GWS | \
- AMDGPU_GEM_DOMAIN_OA)
+ AMDGPU_GEM_DOMAIN_OA | \
+ AMDGPU_GEM_DOMAIN_DOORBELL)
    /* Flag that CPU access will be required for the case of VRAM 
domain */

  #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED    (1 << 0)




[PATCH] drm/amd/amdgpu: add complete header search path

2023-02-06 Thread Randy Dunlap
The path for the "mod_info_packet.h" header file is
incomplete, so add its location to the header search path
in the amdgpu Makefile.

See on ARCH=alpha (275 times in one build).

In file included from ../drivers/gpu/drm/amd/amdgpu/amdgpu.h:90,
 from ../drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:43:
../drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.h:62:10: fatal 
error: mod_info_packet.h: No such file or directory
   62 | #include "mod_info_packet.h"
  |  ^~~
compilation terminated.

Fixes: 5b49da02ddbe ("drm/amd/display: Enable Freesync over PCon")
Signed-off-by: Randy Dunlap 
Cc: Signed-off-by: Sung Joon Kim 
Cc: Alex Deucher 
Cc: Christian König 
Cc: "Pan, Xinhui" 
Cc: amd-gfx@lists.freedesktop.org
Cc: dri-de...@lists.freedesktop.org
---
 drivers/gpu/drm/amd/amdgpu/Makefile |1 +
 1 file changed, 1 insertion(+)

diff -- a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -34,6 +34,7 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/
-I$(FULL_AMD_PATH)/acp/include \
-I$(FULL_AMD_DISPLAY_PATH) \
-I$(FULL_AMD_DISPLAY_PATH)/include \
+   -I$(FULL_AMD_DISPLAY_PATH)/modules/inc \
-I$(FULL_AMD_DISPLAY_PATH)/dc \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
-I$(FULL_AMD_PATH)/amdkfd


[bug report] drm/amd/display: Disable MALL SS and messages for PSR supported configs

2023-02-06 Thread Dan Carpenter
Hello Dillon Varone,

This is a semi-automatic email about new static checker warnings.

The patch 0bed85e48af1: "drm/amd/display: Disable MALL SS and
messages for PSR supported configs" from Jan 4, 2023, leads to the
following Smatch complaint:

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_hwseq.c:257 
dcn32_apply_idle_power_optimizations()
warn: variable dereferenced before check 'dc->current_state' (see line 249)

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_hwseq.c
   248  
   249  for (i = 0; i < dc->current_state->stream_count; i++) {
^^^
Patch adds unchecked dereference

   250  /* MALL SS messaging is not supported with PSR at this 
time */
   251  if (dc->current_state->streams[i] != NULL &&
   252  
dc->current_state->streams[i]->link->psr_settings.psr_version != 
DC_PSR_VERSION_UNSUPPORTED)
   253  return false;
   254  }
   255  
   256  if (enable) {
   257  if (dc->current_state) {

Checked too late.

   258  
   259  /* 1. Check no memory request case for CAB.

regards,
dan carpenter


gpu_metrics does not provide 'current_gfxclk', 'current_uclk', 'average_cpu_power' & 'temperature_core' on AMD Ryzen 7000 CPU

2023-02-06 Thread sfrcorne
Hello,

I hope this is the correct place to ask my question. I was not sure if I should 
have opened a new issue on Gitlab or send an email here, since I don't know 
know whether this is a bug or intended behaviour.

The question is about the new AMD Ryzen 7000 CPU's. These new CPU's have an 
iGPU and consequently provide a gpu_metrics file for monitoring the GPU/CPU 
(APU?). This file is used by programs like Mangohud, that try to read (among 
other values) the following 4 values:
- current_gfxclk
- current_uclk
- average_cpu_power
- temperature_core

However it appears that on AMD Ryzen 7000 CPU's these 4 values are not 
provided/updated in the gpu_metrics file. Other values like 
'average_core_power', 'temperature_l3' and the other 'current_clk' are also 
not provided/updated but these are not used by Mangohud at the moment.

Is this intentional or a bug? And will this be fix and/or will support for 
these 4 values be added in the future?

Kind Regards,
sfrcorne

Re: Patched macOS kexts start Raven iGPU, but GPUVM page fault occurs on the first GFX and SDMA IB submitted by WindowServer. Help?

2023-02-06 Thread Visual (VisualDevelopment)
While simply waiting for a reply to the email was an attractive option, we 
chose to investigate other parts of the code during the last three days.

More precisely, we investigated _mmhub_1_0_update_medium_grain_clock_gating and 
seemed to have discovered a register offset mismatch. However, we soon found 
that the HWIP discovery code automatically applies Raven-specific fixes to the 
offset. Therefore, the behaviour is correct in the end.
We also thought of running the WhateverRed kext on a Vega machine to see how 
the kext behaves differently on a dGPU with similar architecture. The problem 
is that we need a tester who owns a Vega card to do that. Nyan Cat sent a 
message to seek assistance on the AMD OS X server but got no reply. In the 
meantime, Visual asked if his friend, who owns a Vega card,  were willing to 
help. However, he also received no response to this date.

We realise that we started the thread at an odd time (Saturday) and with an 
unformatted subject, not to mention a few typos we noticed right after sending 
the email. We hope that these flaws won't seize your interest in the project.

> On 4 Feb 2023, at 13:24, Visual (VisualDevelopment) 
>  wrote:
> 
> Table of Contents:
> 1. Introduction
> 2. History of WhateverRed
>2.1. Wrapping/Redirecting kext logic with Lilu
>2.2. VTables and our Reverse Engineering
>2.3. Debugging with a black screen
>2.4. Firmware injection and other HWLibs troubles
>2.5. AMDRadeonX5000 Video Decoding/Encoding and SDMA engine mismatches
>2.6. SDMA0 power on via SMC
>2.7. SDMA0 Accel channel skipping memory mapping commands
> 3. Current issue
>3.1. VM Protection Faults
>3.2. Analysis of the diagnostic dump
>3.3. A deeper dive into the protection fault
> 4. What we know so far
>4.1. The VM Blocks and the PDEs/PTEs
>4.2. The VM registers
>4.3. The PDE/PTE flags
>4.4. The translate_further mode
>4.5. The VMPTConfig in AMD kexts
>4.6. How the entryCount is determined on AMDGPU
>4.7. The GPUVM settings on AMDRadeonX5000 vs. AMDGPU
> 5. What we have tried
>5.1. PTE/PDE flags experimentations
>5.2. Experimentation with VMPTConfig and related settings
> 6. How you can help
>6.1. Unanswered questions
>6.2. Ways to contact us
> 
> 
> -- 1. Introduction --
> Hello everyone.
> We are a small team of 3 people trying to get Hackintoshes (PCs running 
> macOS) with AMD (Vega) iGPUs (specifically Raven/Raven2/Renoir and their 
> derivatives, such as Picasso) to have graphics acceleration on AMD laptops.
> To be precise, we are fixing broken and/or missing logic via patching the 
> existing kexts (currently AMDRadeonX5000 for GCN 5 (GFX 9) and AMDRadeonX6000 
> for VCN (GFX 10), AMDRadeonX6000Framebuffer for DCN instead of 
> AMD1Controller since it is DCE).
> 
> The team members are:
> - Visual, the Project Owner, is a Greek 17 year old CS student with extensive 
> knowledge on Operating System development. He writes most of the kext code 
> and provides insight on OS and Driver behaviour when possible.
> - NyanCatTW1, the Automation Engineer, is a 17-year-old student who lives in 
> Taiwan. The NYCU CSIE admitted him last year. He also does most of the 
> Reverse Engineering.
> - Allen Chen, the tester with a Renoir laptop, perseverance and some ideas; 
> helps with the effort occasionally, currently striving to become NyanCatTW1's 
> classmate again, as they were six years ago
> 
> Our kext, WhateverRed has successfully gotten the aforesaid kexts to 
> deterministically power up and start the IPs/MEs in the GPU, such as GFX and 
> SDMA. Attached are partial highlights of a dmesg log from the main testing 
> system:
> 
>[   27.351538]: netdbg: Disabled via boot arg
>[   27.351543]: rad: patching device type table
>[   27.351558]: rad: Automagically getting VBIOS from VFCT table
>...
>[   27.505319]: [3:0:0] [Accel] >>> Calling TTL::initialize()
>[   27.505331]: [AMD INFO] TTL Interface: Boot mode Normal.
>...
>[   27.649777]: [3:0:0] [Accel] <<< TTL::initialize() Completed 
> successfully.
>...
>[   27.662027]: Accelerator successfully registered with controller.
>...
>[   29.346963]: rad: _SmuRaven_Initialize returned 0x1
>[   29.346967]: rad: Sending PPSMC_MSG_PowerUpSdma (0xE) to the SMC
>[   29.347052]: rad: _Raven_SendMsgToSmcWithParameter returned 0x1
>...
>[   29.365343]: rad: powerUpHW: this = 0xff935ca3d000
>[   29.377219]: rad: powerUpHW returned 1
>[   29.377228]: [3:0:0]: Controller is enabled, finish initialization
>[   29.424252]: Adding AGDP mode validate property
>[   29.425160]: kPEDisableScreen 1
>[   29.425685]: [3:0:0] [FB:0] AmdRadeonFramebuffer::setCursorImage() !!! 
> Driver is offline.
>[   29.425695]: [3:0:0] [FB:1] AmdRadeonFramebuffer::setCursorImage() !!! 
> Driver is offline.
> 
> 
> The project is hosted on GitHub (https://github.com/NootInc/WhateverRed) with 
> 135 s

RE: [PATCH] drm/amdkfd: To fix sdma page fault issue for GC 11.x

2023-02-06 Thread Liu, Aaron
[AMD Official Use Only - General]

Reviewed-by: Aaron Liu 

> -Original Message-
> From: Ji, Ruili 
> Sent: Monday, February 6, 2023 8:58 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Kuehling, Felix
> ; Liu, Aaron ; Zhang, Yifan
> ; Ji, Ruili 
> Subject: [PATCH] drm/amdkfd: To fix sdma page fault issue for GC 11.x
>
> From: Ruili Ji 
>
> For the MQD memory, KMD would always allocate 4K memory, and mes
> scheduler would write to the end of MQD for unmap flag.
>
> Signed-off-by: Ruili Ji 
> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 20
> +++  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> | 12 +--
>  2 files changed, 26 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index c06ada0844ba..d682e6921438 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -2244,10 +2244,22 @@ static int allocate_hiq_sdma_mqd(struct
> device_queue_manager *dqm)
>   int retval;
>   struct kfd_dev *dev = dqm->dev;
>   struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
> - uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]-
> >mqd_size *
> - get_num_all_sdma_engines(dqm) *
> - dev->device_info.num_sdma_queues_per_engine +
> - dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
> + uint32_t size;
> + /*
> +  * MES write to areas beyond MQD size. So allocate
> +  * 1 PAGE_SIZE memory for MQD is MES is enabled.
> +  */
> + if (dev->shared_resources.enable_mes) {
> + size = PAGE_SIZE *
> + get_num_all_sdma_engines(dqm) *
> + dev->device_info.num_sdma_queues_per_engine +
> + dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
> + } else {
> + size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size
> *
> + get_num_all_sdma_engines(dqm) *
> + dev->device_info.num_sdma_queues_per_engine +
> + dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
> + }
>
>   retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
>   &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), diff --git
> a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> index 623ccd227b7d..ea176a515898 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> @@ -66,15 +66,23 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct
> kfd_dev *dev,  {
>   struct kfd_mem_obj *mqd_mem_obj = NULL;
>   uint64_t offset;
> + uint32_t size;
>
>   mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
>   if (!mqd_mem_obj)
>   return NULL;
> + /*
> +  * MES write to areas beyond MQD size. So allocate
> +  * 1 PAGE_SIZE memory for MQD is MES is enabled.
> +  */
> + if (dev->shared_resources.enable_mes)
> + size = PAGE_SIZE;
> + else
> + size = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]-
> >mqd_size;
>
>   offset = (q->sdma_engine_id *
>   dev->device_info.num_sdma_queues_per_engine +
> - q->sdma_queue_id) *
> - dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
> + q->sdma_queue_id) * size;
>
>   offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
>
> --
> 2.25.1



[PATCH] drm/amdkfd: To fix sdma page fault issue for GC 11.x

2023-02-06 Thread Ji, Ruili
From: Ruili Ji 

For the MQD memory, KMD would always allocate 4K memory,
and mes scheduler would write to the end of MQD for unmap flag.

Signed-off-by: Ruili Ji 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 20 +++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  | 12 +--
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c06ada0844ba..d682e6921438 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2244,10 +2244,22 @@ static int allocate_hiq_sdma_mqd(struct 
device_queue_manager *dqm)
int retval;
struct kfd_dev *dev = dqm->dev;
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
-   uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
-   get_num_all_sdma_engines(dqm) *
-   dev->device_info.num_sdma_queues_per_engine +
-   dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+   uint32_t size;
+   /*
+* MES write to areas beyond MQD size. So allocate
+* 1 PAGE_SIZE memory for MQD is MES is enabled.
+*/
+   if (dev->shared_resources.enable_mes) {
+   size = PAGE_SIZE *
+   get_num_all_sdma_engines(dqm) *
+   dev->device_info.num_sdma_queues_per_engine +
+   dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+   } else {
+   size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
+   get_num_all_sdma_engines(dqm) *
+   dev->device_info.num_sdma_queues_per_engine +
+   dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+   }
 
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 623ccd227b7d..ea176a515898 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -66,15 +66,23 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
 {
struct kfd_mem_obj *mqd_mem_obj = NULL;
uint64_t offset;
+   uint32_t size;
 
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
return NULL;
+   /*
+* MES write to areas beyond MQD size. So allocate
+* 1 PAGE_SIZE memory for MQD is MES is enabled.
+*/
+   if (dev->shared_resources.enable_mes)
+   size = PAGE_SIZE;
+   else
+   size = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
 
offset = (q->sdma_engine_id *
dev->device_info.num_sdma_queues_per_engine +
-   q->sdma_queue_id) *
-   dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
+   q->sdma_queue_id) * size;
 
offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
 
-- 
2.25.1



Re: [PATCH 09/13] drm/amdgpu: accommodate DOMAIN/PL_DOORBELL

2023-02-06 Thread Christian König

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

This patch adds changes to accommodate the new GEM/TTM domain
for doorbell memory.

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c | 19 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  | 24 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  3 ++-
  7 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e3e2e6e3b485..e1c1a360614e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -974,6 +974,7 @@ struct amdgpu_device {
atomic64_t vram_pin_size;
atomic64_t visible_pin_size;
atomic64_t gart_pin_size;
+   atomic64_t doorbell_pin_size;


Please drop that, the amount of pinned doorbells is not needed as far as 
I can see.


  
  	/* soc15 register offset based on ip, instance and  segment */

uint32_t*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
index 0656e5bb4f05..43a3137019b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
@@ -659,15 +659,17 @@ static void amdgpu_bar_mgr_del(struct 
ttm_resource_manager *man,
   * @dev: the other device
   * @dir: dma direction
   * @sgt: resulting sg table
+ * @mem_type: memory type
   *
   * Allocate and fill a sg table from a VRAM allocation.
   */
  int amdgpu_bar_mgr_alloc_sgt(struct amdgpu_device *adev,
- struct ttm_resource *res,
- u64 offset, u64 length,
- struct device *dev,
- enum dma_data_direction dir,
- struct sg_table **sgt)
+struct ttm_resource *res,
+u64 offset, u64 length,
+struct device *dev,
+enum dma_data_direction dir,
+struct sg_table **sgt,
+u32 mem_type)


And again that doesn't make any sense at all.

For now we don't want to export doorbells through DMA-buf.


  {
struct amdgpu_res_cursor cursor;
struct scatterlist *sg;
@@ -701,10 +703,15 @@ int amdgpu_bar_mgr_alloc_sgt(struct amdgpu_device *adev,
 */
amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
-   phys_addr_t phys = cursor.start + adev->gmc.vram_aper_base;
+   phys_addr_t phys = cursor.start;
size_t size = cursor.size;
dma_addr_t addr;
  
+		if (mem_type == TTM_PL_VRAM)

+   phys += adev->gmc.vram_aper_base;
+   else
+   phys += adev->gmc.doorbell_aper_base;
+
addr = dma_map_resource(dev, phys, size, dir,
DMA_ATTR_SKIP_CPU_SYNC);
r = dma_mapping_error(dev, addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index c48ccde281c3..c645bdc49f34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -179,9 +179,10 @@ static struct sg_table *amdgpu_dma_buf_map(struct 
dma_buf_attachment *attach,
break;
  
  	case TTM_PL_VRAM:

+   case AMDGPU_PL_DOORBELL:
r = amdgpu_bar_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
 bo->tbo.base.size, attach->dev,
-dir, &sgt);
+dir, &sgt, 
bo->tbo.resource->mem_type);
if (r)
return ERR_PTR(r);
break;


That stuff can be dropped as well as far as I can see.


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 887fc53a7d16..b2cfd46c459b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -147,6 +147,18 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo 
*abo, u32 domain)
c++;
}
  
+	if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {

+   places[c].fpfn = 0;
+   places[c].lpfn = 0;
+   places[c].mem_type = AMDGPU_PL_DOORBELL;
+   places[c].flags = 0;
+   places[c].flags |= TTM_PL_FLAG_TOPDOWN;
+
+   if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+

Re: [PATCH 08/13] drm/amdgpu: move doorbell ptr into mman structure

2023-02-06 Thread Christian König

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

This patch:
- moves the doorbell.ptr variable to mman structure
- renames it to doorbell_aper_base_kaddr for better readability;

This change is to make doorbell's ttm management similar to vram's.

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 


Yeah, that seems to make sense. Acked-by: Christian König 




---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 22 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  1 +
  3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 16580d9580d4..cda5387aae50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -597,7 +597,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 
index)
return 0;
  
  	if (index < adev->doorbell.num_doorbells) {

-   return readl(adev->doorbell.ptr + index);
+   return readl(adev->mman.doorbell_aper_base_kaddr + index);
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
return 0;
@@ -620,7 +620,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 
index, u32 v)
return;
  
  	if (index < adev->doorbell.num_doorbells) {

-   writel(v, adev->doorbell.ptr + index);
+   writel(v, adev->mman.doorbell_aper_base_kaddr + index);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
}
@@ -641,7 +641,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 
index)
return 0;
  
  	if (index < adev->doorbell.num_doorbells) {

-   return atomic64_read((atomic64_t *)(adev->doorbell.ptr + 
index));
+   return atomic64_read((atomic64_t 
*)(adev->mman.doorbell_aper_base_kaddr + index));
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
return 0;
@@ -664,7 +664,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 
index, u64 v)
return;
  
  	if (index < adev->doorbell.num_doorbells) {

-   atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
+   atomic64_set((atomic64_t *)(adev->mman.doorbell_aper_base_kaddr 
+ index), v);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
}
@@ -1038,7 +1038,7 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)
adev->gmc.doorbell_aper_base = 0;
adev->gmc.doorbell_aper_size = 0;
adev->doorbell.num_doorbells = 0;
-   adev->doorbell.ptr = NULL;
+   adev->mman.doorbell_aper_base_kaddr = NULL;
return 0;
}
  
@@ -1071,10 +1071,10 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)

adev->doorbell.num_doorbells += 0x400;
}
  
-	adev->doorbell.ptr = ioremap(adev->gmc.doorbell_aper_base,

-adev->doorbell.num_doorbells *
-sizeof(u32));
-   if (adev->doorbell.ptr == NULL)
+   adev->mman.doorbell_aper_base_kaddr = 
ioremap(adev->gmc.doorbell_aper_base,
+ 
adev->doorbell.num_doorbells *
+ sizeof(u32));
+   if (adev->mman.doorbell_aper_base_kaddr == NULL)
return -ENOMEM;
  
  	return 0;

@@ -1089,8 +1089,8 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)
   */
  static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
  {
-   iounmap(adev->doorbell.ptr);
-   adev->doorbell.ptr = NULL;
+   iounmap(adev->mman.doorbell_aper_base_kaddr);
+   adev->mman.doorbell_aper_base_kaddr = NULL;
  }
  
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h

index c6324970eb79..464be28da4fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -25,7 +25,6 @@
   * GPU doorbell structures, functions & helpers
   */
  struct amdgpu_doorbell {
-   u32 __iomem *ptr;
u32 num_doorbells;  /* Number of doorbells actually 
reserved for amdgpu. */
  };
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

index ea53aae3ee0b..243deb1ffc54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -52,6 +52,7 @@ struct amdgpu_mman {
struct ttm_device   bdev;
boolinitialized;
void __iomem*vram_a

Re: [PATCH 07/13] drm/amdgpu: store doorbell info in gmc structure

2023-02-06 Thread Christian König

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

This patch moves doorbell info into adev->gmc structure, to align
with vram info. This will prepare structures for proper ttm management
of the doorbell BAR.


Mhm, this is most likely not a good idea either.

The doorbell isn't managed by the GMC in any way. That are two 
completely different things in hw if I'm not completely mistaken.


Christian.



Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  8 
  drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c  |  4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 14 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  3 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |  7 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|  2 +-
  drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c   |  4 ++--
  drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c   |  4 ++--
  drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c   |  4 ++--
  drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c   |  4 ++--
  drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   |  4 ++--
  drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c   |  4 ++--
  13 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 58689b2a2d1c..28076da2258f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -106,13 +106,13 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,
 * not initialized as AMDGPU manages the whole
 * doorbell space.
 */
-   *aperture_base = adev->doorbell.base;
+   *aperture_base = adev->gmc.doorbell_aper_base;
*aperture_size = 0;
*start_offset = 0;
-   } else if (adev->doorbell.size > adev->doorbell.num_doorbells *
+   } else if (adev->gmc.doorbell_aper_size > adev->doorbell.num_doorbells *
sizeof(u32)) {
-   *aperture_base = adev->doorbell.base;
-   *aperture_size = adev->doorbell.size;
+   *aperture_base = adev->gmc.doorbell_aper_base;
+   *aperture_size = adev->gmc.doorbell_aper_size;
*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
} else {
*aperture_base = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
index 3257da5c3a66..0656e5bb4f05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
@@ -209,7 +209,7 @@ static ssize_t amdgpu_mem_info_doorbell_total_show(struct 
device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
  
-	return sysfs_emit(buf, "%llu\n", adev->doorbell.size);

+   return sysfs_emit(buf, "%llu\n", adev->gmc.doorbell_aper_size);
  }
  
  /**

@@ -897,7 +897,7 @@ int amdgpu_bar_mgr_init(struct amdgpu_device *adev, u32 
domain)
size = adev->gmc.real_vram_size;
} else if (domain == AMDGPU_PL_DOORBELL) {
mgr = &adev->mman.doorbell_mgr;
-   size = adev->doorbell.size;
+   size = adev->gmc.doorbell_aper_size;
} else {
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 45588b7919fe..16580d9580d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1035,8 +1035,8 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)
  
  	/* No doorbell on SI hardware generation */

if (adev->asic_type < CHIP_BONAIRE) {
-   adev->doorbell.base = 0;
-   adev->doorbell.size = 0;
+   adev->gmc.doorbell_aper_base = 0;
+   adev->gmc.doorbell_aper_size = 0;
adev->doorbell.num_doorbells = 0;
adev->doorbell.ptr = NULL;
return 0;
@@ -1048,15 +1048,15 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)
amdgpu_asic_init_doorbell_index(adev);
  
  	/* doorbell bar mapping */

-   adev->doorbell.base = pci_resource_start(adev->pdev, 2);
-   adev->doorbell.size = pci_resource_len(adev->pdev, 2);
+   adev->gmc.doorbell_aper_base = pci_resource_start(adev->pdev, 2);
+   adev->gmc.doorbell_aper_size = pci_resource_len(adev->pdev, 2);
  
  	if (adev->enable_mes) {

adev->doorbell.num_doorbells =
-   adev->doorbell.size / sizeof(u32);
+   adev->gmc.doorbell_aper_size / sizeof(u32);
} else {
adev->doorbell.num_doorbells =
-   min_t(u32, adev->doorbell.size /

Re: [PATCH 06/13] drm/amdgpu: rename gmc.aper_base/size

2023-02-06 Thread Christian König

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

This patch renames aper_base and aper_size parameters (in adev->gmc),
to vram_aper_base and vram_aper_size, to differentiate it from the
doorbell BAR.

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 


Acked-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  6 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  | 12 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  8 
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 10 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  | 10 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c   |  6 +++---
  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 12 ++--
  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 10 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 10 +-
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c|  4 ++--
  14 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index f99d4873bf22..58689b2a2d1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -438,7 +438,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device 
*adev,
mem_info->vram_width = adev->gmc.vram_width;
  
  	pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",

-   &adev->gmc.aper_base,
+   &adev->gmc.vram_aper_base,
mem_info->local_mem_size_public,
mem_info->local_mem_size_private);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c

index 0e0f212bd71c..3257da5c3a66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bar_mgr.c
@@ -701,7 +701,7 @@ int amdgpu_bar_mgr_alloc_sgt(struct amdgpu_device *adev,
 */
amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
-   phys_addr_t phys = cursor.start + adev->gmc.aper_base;
+   phys_addr_t phys = cursor.start + adev->gmc.vram_aper_base;
size_t size = cursor.size;
dma_addr_t addr;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 0b6a394e109b..45588b7919fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3961,7 +3961,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device 
*adev)
/* Memory manager related */
if (!adev->gmc.xgmi.connected_to_cpu) {
arch_phys_wc_del(adev->gmc.vram_mtrr);
-   arch_io_free_memtype_wc(adev->gmc.aper_base, 
adev->gmc.aper_size);
+   arch_io_free_memtype_wc(adev->gmc.vram_aper_base, 
adev->gmc.vram_aper_size);
}
  }
  
@@ -5562,14 +5562,14 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,

uint64_t address_mask = peer_adev->dev->dma_mask ?
~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
resource_size_t aper_limit =
-   adev->gmc.aper_base + adev->gmc.aper_size - 1;
+   adev->gmc.vram_aper_base + adev->gmc.vram_aper_size - 1;
bool p2p_access =
!adev->gmc.xgmi.connected_to_cpu &&
!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
  
  	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&

adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
-   !(adev->gmc.aper_base & address_mask ||
+   !(adev->gmc.vram_aper_base & address_mask ||
  aper_limit & address_mask));
  #else
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 02a4c93673ce..c7e64e234de6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -775,7 +775,7 @@ uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, 
struct amdgpu_bo *bo)
   */
  uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo 
*bo)
  {
-   return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + 
adev->gmc.aper_base;
+   return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + 
adev->gmc.vram_aper_base;
  }
  
  int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 0305b660cd17..bb7076ecbf01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -167,8 +167,8 @@ struct amd

Re: [PATCH 04/13] drm/amdgpu: replace aper_base_kaddr with vram_aper_base_kaddr

2023-02-06 Thread Christian König

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

To differentiate it from the doorbell BAR.

Signed-off-by: Alex Deucher 


Acked-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 14 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h|  2 +-
  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 10 +-
  drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +-
  5 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2f28a8c02f64..0b6a394e109b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -354,12 +354,12 @@ size_t amdgpu_device_aper_access(struct amdgpu_device 
*adev, loff_t pos,
size_t count = 0;
uint64_t last;
  
-	if (!adev->mman.aper_base_kaddr)

+   if (!adev->mman.vram_aper_base_kaddr)
return 0;
  
  	last = min(pos + size, adev->gmc.visible_vram_size);

if (last > pos) {
-   addr = adev->mman.aper_base_kaddr + pos;
+   addr = adev->mman.vram_aper_base_kaddr + pos;
count = last - pos;
  
  		if (write) {

@@ -3954,9 +3954,9 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device 
*adev)
  
  	iounmap(adev->rmmio);

adev->rmmio = NULL;
-   if (adev->mman.aper_base_kaddr)
-   iounmap(adev->mman.aper_base_kaddr);
-   adev->mman.aper_base_kaddr = NULL;
+   if (adev->mman.vram_aper_base_kaddr)
+   iounmap(adev->mman.vram_aper_base_kaddr);
+   adev->mman.vram_aper_base_kaddr = NULL;
  
  	/* Memory manager related */

if (!adev->gmc.xgmi.connected_to_cpu) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 668826653591..196ba62ef721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -578,9 +578,9 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device 
*bdev,
if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
return -EINVAL;
  
-		if (adev->mman.aper_base_kaddr &&

+   if (adev->mman.vram_aper_base_kaddr &&
mem->placement & TTM_PL_FLAG_CONTIGUOUS)
-   mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
+   mem->bus.addr = (u8 *)adev->mman.vram_aper_base_kaddr +
mem->bus.offset;
  
  		mem->bus.offset += adev->gmc.aper_base;

@@ -1752,12 +1752,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
  #ifdef CONFIG_64BIT
  #ifdef CONFIG_X86
if (adev->gmc.xgmi.connected_to_cpu)
-   adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
+   adev->mman.vram_aper_base_kaddr = 
ioremap_cache(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
  
  	else

  #endif
-   adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
+   adev->mman.vram_aper_base_kaddr = 
ioremap_wc(adev->gmc.aper_base,
adev->gmc.visible_vram_size);
  #endif
  
@@ -1904,9 +1904,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
  
  	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
  
-		if (adev->mman.aper_base_kaddr)

-   iounmap(adev->mman.aper_base_kaddr);
-   adev->mman.aper_base_kaddr = NULL;
+   if (adev->mman.vram_aper_base_kaddr)
+   iounmap(adev->mman.vram_aper_base_kaddr);
+   adev->mman.vram_aper_base_kaddr = NULL;
  
  		drm_dev_exit(idx);

}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 1061447befc6..020ebba5a51a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -50,7 +50,7 @@ struct amdgpu_gtt_mgr {
  struct amdgpu_mman {
struct ttm_device   bdev;
boolinitialized;
-   void __iomem*aper_base_kaddr;
+   void __iomem*vram_aper_base_kaddr;
  
  	/* buffer handling */

const struct amdgpu_buffer_funcs*buffer_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index bd3e3e23a939..f39d4f593a2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -611,10 +611,10 @@ static int psp_v11_0_memory_training(struct psp_context 
*psp, uint32_t ops)
 */
sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
  
-		if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {

-   DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is 
not initialized.\n",
+   if (adev->gmc

Re: [PATCH 02/13] drm/amdgpu: rename vram_mgr functions to bar_mgr

2023-02-06 Thread Christian König

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

Rename the VRAM manager functions so they can be resused to manage
doorbell BAR as well.


Yeah, as said before Alex did this before we had the buddy allocator.

This doesn't makes sense any more and should probably be dropped completely.

Christian.



Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c   |   8 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c|   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c   |   4 +-
  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h|  14 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  36 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 200 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  18 +-
  12 files changed, 157 insertions(+), 143 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 7b5ce00f0602..e34eae8d64cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -723,7 +723,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct 
amdgpu_device *adev,
if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
u64 total_vis_vram = adev->gmc.visible_vram_size;
u64 used_vis_vram =
- amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
+ amdgpu_bar_mgr_vis_usage(&adev->mman.vram_mgr);
  
  		if (used_vis_vram < total_vis_vram) {

u64 free_vis_vram = total_vis_vram - used_vis_vram;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 271e30e34d93..c48ccde281c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -179,9 +179,9 @@ static struct sg_table *amdgpu_dma_buf_map(struct 
dma_buf_attachment *attach,
break;
  
  	case TTM_PL_VRAM:

-   r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
- bo->tbo.base.size, attach->dev,
- dir, &sgt);
+   r = amdgpu_bar_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
+bo->tbo.base.size, attach->dev,
+dir, &sgt);
if (r)
return ERR_PTR(r);
break;
@@ -215,7 +215,7 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment 
*attach,
sg_free_table(sgt);
kfree(sgt);
} else {
-   amdgpu_vram_mgr_free_sgt(attach->dev, dir, sgt);
+   amdgpu_bar_mgr_free_sgt(attach->dev, dir, sgt);
}
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index cd4caaa29528..9f148ea7ca66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2807,12 +2807,12 @@ static struct pci_error_handlers amdgpu_pci_err_handler 
= {
.resume = amdgpu_pci_resume,
  };
  
-extern const struct attribute_group amdgpu_vram_mgr_attr_group;

+extern const struct attribute_group amdgpu_bar_mgr_attr_group;
  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
  extern const struct attribute_group amdgpu_vbios_version_attr_group;
  
  static const struct attribute_group *amdgpu_sysfs_groups[] = {

-   &amdgpu_vram_mgr_attr_group,
+   &amdgpu_bar_mgr_attr_group,
&amdgpu_gtt_mgr_attr_group,
&amdgpu_vbios_version_attr_group,
NULL,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 7aa7e52ca784..2ce11434fb22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -650,7 +650,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VIS_VRAM_USAGE:
-   ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
+   ui64 = amdgpu_bar_mgr_vis_usage(&adev->mman.vram_mgr);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_GTT_USAGE:
ui64 = ttm_resource_manager_usage(&adev->mman.gtt_mgr.manager);
@@ -704,7 +704,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
atomic64_read(&adev->vi

Re: [PATCH 01/13] drm/amdgpu: add UAPI for allocating doorbell memory

2023-02-06 Thread Christian König

Am 03.02.23 um 20:08 schrieb Shashank Sharma:

From: Alex Deucher 

Signed-off-by: Alex Deucher 
---
  include/uapi/drm/amdgpu_drm.h | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4038abe8505a..fd1c65d78ded 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -94,6 +94,9 @@ extern "C" {
   *
   * %AMDGPU_GEM_DOMAIN_OA  Ordered append, used by 3D or Compute engines
   * for appending data.
+ *
+ * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell memory. It is an MMIO region for
+ * signalling user mode queues.


Drop "memory" from the sentence. The key point of doorbells is that they 
are *not* memory :)


Apart from that looks good to me,
Christian.


   */
  #define AMDGPU_GEM_DOMAIN_CPU 0x1
  #define AMDGPU_GEM_DOMAIN_GTT 0x2
@@ -101,12 +104,14 @@ extern "C" {
  #define AMDGPU_GEM_DOMAIN_GDS 0x8
  #define AMDGPU_GEM_DOMAIN_GWS 0x10
  #define AMDGPU_GEM_DOMAIN_OA  0x20
+#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40
  #define AMDGPU_GEM_DOMAIN_MASK(AMDGPU_GEM_DOMAIN_CPU | \
 AMDGPU_GEM_DOMAIN_GTT | \
 AMDGPU_GEM_DOMAIN_VRAM | \
 AMDGPU_GEM_DOMAIN_GDS | \
 AMDGPU_GEM_DOMAIN_GWS | \
-AMDGPU_GEM_DOMAIN_OA)
+AMDGPU_GEM_DOMAIN_OA | \
+AMDGPU_GEM_DOMAIN_DOORBELL)
  
  /* Flag that CPU access will be required for the case of VRAM domain */

  #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0)




Re: [PATCH] drm/amdgpu: Fix potential race processing vm->freed

2023-02-06 Thread Christian König

Am 03.02.23 um 19:10 schrieb Rob Clark:

From: Rob Clark 

If userspace calls the AMDGPU_CS ioctl from multiple threads, because
the vm is global to the drm_file, you can end up with multiple threads
racing in amdgpu_vm_clear_freed().  So the freed list should be
protected with the status_lock, similar to other vm lists.


Well this is nonsense. To process the freed list the VM root PD lock 
must be held anyway.


If we have a call path where this isn't true then we have a major bug at 
a different place here.


Regards,
Christian.



Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)")
Signed-off-by: Rob Clark 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 33 ++
  1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b9441ab457ea..aeed7bc1512f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1240,10 +1240,19 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
uint64_t init_pte_value = 0;
struct dma_fence *f = NULL;
+   struct list_head freed;
int r;
  
-	while (!list_empty(&vm->freed)) {

-   mapping = list_first_entry(&vm->freed,
+   /*
+* Move the contents of the VM's freed list to a local list
+* that we can iterate without racing against other threads:
+*/
+   spin_lock(&vm->status_lock);
+   list_replace_init(&vm->freed, &freed);
+   spin_unlock(&vm->status_lock);
+
+   while (!list_empty(&freed)) {
+   mapping = list_first_entry(&freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
  
@@ -1258,6 +1267,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,

amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
+
+   /*
+* Move any unprocessed mappings back to the freed
+* list:
+*/
+   spin_lock(&vm->status_lock);
+   list_splice_tail(&freed, &vm->freed);
+   spin_unlock(&vm->status_lock);
+
return r;
}
}
@@ -1583,11 +1601,14 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
mapping->bo_va = NULL;
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
  
-	if (valid)

+   if (valid) {
+   spin_lock(&vm->status_lock);
list_add(&mapping->list, &vm->freed);
-   else
+   spin_unlock(&vm->status_lock);
+   } else {
amdgpu_vm_free_mapping(adev, vm, mapping,
   bo_va->last_pt_update);
+   }
  
  	return 0;

  }
@@ -1671,7 +1692,9 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device 
*adev,
tmp->last = eaddr;
  
  		tmp->bo_va = NULL;

+   spin_lock(&vm->status_lock);
list_add(&tmp->list, &vm->freed);
+   spin_unlock(&vm->status_lock);
trace_amdgpu_vm_bo_unmap(NULL, tmp);
}
  
@@ -1788,7 +1811,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,

amdgpu_vm_it_remove(mapping, &vm->va);
mapping->bo_va = NULL;
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+   spin_lock(&vm->status_lock);
list_add(&mapping->list, &vm->freed);
+   spin_unlock(&vm->status_lock);
}
list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
list_del(&mapping->list);




Re: [PATCH 3/3] drm/connector: Deprecate split for BT.2020 in drm_colorspace enum

2023-02-06 Thread Ville Syrjälä
On Sat, Feb 04, 2023 at 06:09:45AM +, Joshua Ashton wrote:
> 
> 
> On 2/3/23 19:34, Ville Syrjälä wrote:
> > On Fri, Feb 03, 2023 at 09:25:38PM +0200, Ville Syrjälä wrote:
> >> On Fri, Feb 03, 2023 at 08:56:55PM +0200, Ville Syrjälä wrote:
> >>> On Fri, Feb 03, 2023 at 01:28:20PM -0500, Harry Wentland wrote:
> 
> 
>  On 2/3/23 11:00, Ville Syrjälä wrote:
> > On Fri, Feb 03, 2023 at 10:24:52AM -0500, Harry Wentland wrote:
> >>
> >>
> >> On 2/3/23 10:19, Ville Syrjälä wrote:
> >>> On Fri, Feb 03, 2023 at 09:39:42AM -0500, Harry Wentland wrote:
> 
> 
>  On 2/3/23 07:59, Sebastian Wick wrote:
> > On Fri, Feb 3, 2023 at 11:40 AM Ville Syrjälä
> >  wrote:
> >>
> >> On Fri, Feb 03, 2023 at 02:07:44AM +, Joshua Ashton wrote:
> >>> Userspace has no way of controlling or knowing the pixel encoding
> >>> currently, so there is no way for it to ever get the right values 
> >>> here.
> >>
> >> That applies to a lot of the other values as well (they are
> >> explicitly RGB or YCC). The idea was that this property sets the
> >> infoframe/MSA/SDP value exactly, and other properties should be
> >> added to for use userspace to control the pixel encoding/colorspace
> >> conversion(if desired, or userspace just makes sure to
> >> directly feed in correct kind of data).
> >
> > I'm all for getting userspace control over pixel encoding but even
> > then the kernel always knows which pixel encoding is selected and
> > which InfoFrame has to be sent. Is there a reason why userspace 
> > would
> > want to control the variant explicitly to the wrong value?
> >
> 
>  I've asked this before but haven't seen an answer: Is there an 
>  existing
>  upstream userspace project that makes use of this property (other 
>  than
>  what Joshua is working on in gamescope right now)? That would help us
>  understand the intent better.
> >>>
> >>> The intent was to control the infoframe colorimetry bits,
> >>> nothing more. No idea what real userspace there was, if any.
> 
> Controlling the infoframe alone isn't useful at all unless you can 
> guarantee the wire encoding, which we cannot do.
> 
> >>>
> 
>  I don't think giving userspace explicit control over the exact 
>  infoframe
>  values is the right thing to do.
> 
> +1
> 
> >>>
> >>> Only userspace knows what kind of data it's stuffing into
> >>> the pixels (and/or how it configures the csc units/etc.) to
> >>> generate them.
> >>>
> >>
> >> Yes, but userspace doesn't control or know whether we drive
> >> RGB or YCbCr on the wire. In fact, in some cases our driver
> >> needs to fallback to YCbCr420 for bandwidth reasons. There
> >> is currently no way for userspace to know that and I don't
> >> think it makes sense.
> >
> > People want that control as well for whatever reason. We've
> > been asked to allow YCbCr 4:4:4 output many times.
> >
> > The automagic 4:2:0 fallback I think is rather fundementally
> > incompatible with fancy color management. How would we even
> > know whether to use eg. BT.2020 vs. BT.709 matrix? In i915
> > that stuff is just always BT.709 limited range, no questions
> > asked.
> 
> That's what the Colorspace property *should* be determining here.
> That's what we have it set up to do in SteamOS/my tree right now.
> 
> >
> 
>  We use what we're telling the display, i.e., the value in the
>  colorspace property. That way we know whether to use a BT.2020
>  or BT.709 matrix.
> >>>
> >>> And given how these things have gone in the past I think
> >>> that is likey to bite someone at in the future. Also not
> >>> what this property was meant to do nor does on any other
> >>> driver AFAIK.
> >>>
>  I don't see how it's fundamentally incompatible with fancy
>  color management stuff.
> 
>  If we start forbidding drivers from falling back to YCbCr
>  (whether 4:4:4 or 4:2:0) we will break existing behavior on
>  amdgpu and will see bug reports.
> >>>
> >>> The compositors could deal with that if/when they start doing
> >>> the full color management stuff. The current stuff only really
> >>> works when the kernel is allowed to do whatever it wants.
> >>>
> 
> > So I think if userspace wants real color management it's
> > going to have to set up the whole pipeline. And for that
> > we need at least one new property to control the RGB->YCbCr
> > conversion (or to explicitly avoid it).
> 
> I mentioned this in my commit description, we absolutely should offer 
> fine control here eventually.
> 
> I don't think we need to solve that problem here though.
> 
> >
> > And given that the proposed patch 

Re: [PATCH] drm/amdgpu: expose more memory stats in fdinfo

2023-02-06 Thread Christian König

Just two nit picks:

+    seq_printf(m, "drm-evicted-visible-vram:\t%llu KiB\n",
+           stats.evicted_visible_vram/1024UL);

For the values not standardized for all DRM drivers we might want to use 
amd as prefix here instead of drm.


+    uint64_t requested_gtt;        /* how much userspace asked for */

We used to have automated checkers complaining about comments after members.

Kerneldoc complicent comments look like this:

    /* @timestamp replaced by @rcu on dma_fence_release() */
    struct rcu_head rcu;

Apart from that looks good to me.

Regards,
Christian.

Am 30.01.23 um 07:56 schrieb Marek Olšák:

Hi,

This will be used for performance investigations. The patch is attached.

Thanks,
Marek




Re: [PATCH] drm/amdgpu: only WARN freeing buffers when DMA is unavailable

2023-02-06 Thread Christian König

Am 06.02.23 um 09:28 schrieb Xiao, Jack:


[AMD Official Use Only - General]

   >> >> It's simply not allowed to free up resources 
during suspend since those can't be acquired again during resume.


      >> The in_suspend flag is set at the 
beginning of suspend and unset at the end of resume. It can’t filter 
the case you mentioned.



   Why not? This is exactly what it should do.

[Jack] If freeing up resources during resume, it should not hit the 
issue you described. But only checking in_suspend flag would take 
these cases as warning.




No, once more: Freeing up or allocating resources between suspend and 
resume is illegal!


If you free up a resource during resume you should absolutely hit that, 
this is intentional!


Regards,
Christian.


Regards,

Jack

*From:* Koenig, Christian 
*Sent:* Monday, February 6, 2023 4:06 PM
*To:* Xiao, Jack ; Christian König 
; amd-gfx@lists.freedesktop.org; 
Deucher, Alexander 
*Subject:* Re: [PATCH] drm/amdgpu: only WARN freeing buffers when DMA 
is unavailable


Am 06.02.23 um 08:23 schrieb Xiao, Jack:

[AMD Official Use Only - General]

>> Nope, that is not related to any hw state.

can use other flag.

>> It's simply not allowed to free up resources during suspend
since those can't be acquired again during resume.

The in_suspend flag is set at the beginning of suspend and unset
at the end of resume. It can’t filter the case you mentioned.


Why not? This is exactly what it should do.

Do you know the root cause of these cases hitting the issue? So
that we can get an exact point to warn the freeing up behavior.


Well the root cause are programming errors. See between suspending and 
resuming you should not allocate nor free memory.


Otherwise we can run into trouble. And this check here is one part of 
that, we should probably add another warning during allocation of 
memory. But this here is certainly correct.


Regards,
Christian.

Thanks,

Jack

*From:* Christian König 

*Sent:* Friday, February 3, 2023 9:20 PM
*To:* Xiao, Jack  ;
Koenig, Christian 
; amd-gfx@lists.freedesktop.org;
Deucher, Alexander 

*Subject:* Re: [PATCH] drm/amdgpu: only WARN freeing buffers when
DMA is unavailable

Nope, that is not related to any hw state.

It's simply not allowed to free up resources during suspend since
those can't be acquired again during resume.

We had a couple of cases now where this was wrong. If you get a
warning from that please fix the code which tried to free
something during suspend instead.

Regards,
Christian.

Am 03.02.23 um 07:04 schrieb Xiao, Jack:

[AMD Official Use Only - General]

>> It's simply illegal to free up memory during suspend.

Why? In my understanding, the limit was caused by DMA shutdown.

Regards,

Jack

*From:* Koenig, Christian 

*Sent:* Thursday, February 2, 2023 7:43 PM
*To:* Xiao, Jack 
; amd-gfx@lists.freedesktop.org;
Deucher, Alexander 

*Subject:* AW: [PATCH] drm/amdgpu: only WARN freeing buffers
when DMA is unavailable

Big NAK to this! This warning is not related in any way to the
hw state.

It's simply illegal to free up memory during suspend.

Regards,

Christian.



*Von:*Xiao, Jack 
*Gesendet:* Donnerstag, 2. Februar 2023 10:54
*An:* amd-gfx@lists.freedesktop.org
; Deucher, Alexander
; Koenig, Christian

*Cc:* Xiao, Jack 
*Betreff:* [PATCH] drm/amdgpu: only WARN freeing buffers when
DMA is unavailable

Reduce waringings, only warn when DMA is unavailable.

Signed-off-by: Jack Xiao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 2d237f3d3a2e..e3e3764ea697 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -422,7 +422,8 @@ void amdgpu_bo_free_kernel(struct
amdgpu_bo **bo, u64 *gpu_addr,
 if (*bo == NULL)
 return;

- WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend);
+ WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend &&
+

!amdgpu_ttm_adev((*bo)->tbo.bdev)->ip_blocks[AMD_IP_BLOCK_TYPE_SDMA].status.hw);

 if (likely(amdgpu_bo_r

Re: [PATCH 0/8] AMDGPU usermode queues

2023-02-06 Thread Christian König

Am 06.02.23 um 01:52 schrieb Dave Airlie:

On Sat, 4 Feb 2023 at 07:54, Shashank Sharma  wrote:

From: Shashank Sharma 

This patch series introduces AMDGPU usermode graphics queues.
User queues is a method of GPU workload submission into the graphics
hardware without any interaction with kernel/DRM schedulers. In this
method, a userspace graphics application can create its own workqueue
and submit it directly in the GPU HW.

The general idea of how this is supposed to work:
- The application creates the following GPU objetcs:
   - A queue object to hold the workload packets.
   - A read pointer object.
   - A write pointer object.
   - A doorbell page.
- Kernel picks any 32-bit offset in the doorbell page for this queue.
- The application uses the usermode_queue_create IOCTL introduced in
   this patch, by passing the the GPU addresses of these objects (read
   ptr, write ptr, queue base address and doorbell address)
- The kernel creates the queue and maps it in the HW.
- The application can start submitting the data in the queue as soon as
   the kernel IOCTL returns.
- Once the data is filled in the queue, the app must write the number of
   dwords in the doorbell offset, and the GPU will start fetching the data.

So I just have one question about forward progress here, let's call it
the 51% of VRAM problem.

You have two apps they both have working sets that allocate > 51% of VRAM.


Marek and I have been working on this quite extensively.


Application (a) has the VRAM and mapping for the user queues and is
submitting work
Application (b) wants to submit work, it has no queue mapping as it
was previously evicted, does (b) have to call an ioctl to get it's
mapping back?


Long story short: No, but that's a bit more complicated to explain.


When (b) calls the ioctl (a) loses it mapping. Control returns to (b),
but before it submits any work on the ring mapping it has, (a) gets
control and notices it has no queues, so calls the ioctl, and (b)
loses it mapping, and around and around they go never making forward
progress.

What's the exit strategy for something like that, fall back to kernel
submit so you can get a memory objects validated and submit some work?


First of all the fw makes sure that processes can only be evicted after 
they used up their time slice. So when you have two processes fighting 
over a shared resource (can be memory, locks or whatever) they will 
always get until the end of their time slice before they are pushed away 
from the hw.


Then when a process is evicted we take a look at what the process has 
already scheduled as work on the hw. If the process isn't idle we start 
a delayed work item to get it going again (similar to what the KFD is 
doing at the moment). When the process is idle we unmap the doorbell 
page(s) from the CPU and wait for the page fault which signals that the 
process wants to submit something again.


And the last component is a static resource management which distributes 
the available resources equally between the different active processes 
fighting over them. Activity of a process is determined by the periodic 
interrupts send by the hw for running processes.


I call the memory management algorithm based on this Robin Hood 
(https://drive.google.com/file/d/1vIrX37c3B2IgWFtZ2UpeKxh0-YMlV6NU/view) 
and simulated that a bit in some spread sheets, but it isn't fully 
implemented yet. I'm working on this for a couple of years now and 
slowly pushing DRM/TTM into the direction we need for this to work.


Christian.



Dave.




Re: [PATCH] drm/amdgpu: Use the TGID for trace_amdgpu_vm_update_ptes

2023-02-06 Thread Christian König




Am 02.02.23 um 17:21 schrieb Friedrich Vock:

The pid field corresponds to the result of gettid() in userspace.
However, userspace cannot reliably attribute PTE events to processes
with just the thread id. This patch allows userspace to easily
attribute PTE update events to specific processes by comparing this
field with the result of getpid().

For attributing events to specific threads, the thread id is also
contained in the common fields of each trace event.

Signed-off-by: Friedrich Vock 


Ah, yes that makes more sense. Reviewed-by: Christian König 



Alex do you pick this up or should I take care of it?

Thanks,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index b5f3bba851db..01e42bdd8e4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -974,7 +974,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params 
*params,
trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
min(nptes, 32u), dst, incr,
upd_flags,
-   vm->task_info.pid,
+   vm->task_info.tgid,

vm->immediate.fence_context);
amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
   cursor.level, pe_start, dst,
--
2.39.1





RE: [PATCH] drm/amdgpu: only WARN freeing buffers when DMA is unavailable

2023-02-06 Thread Xiao, Jack
[AMD Official Use Only - General]

  >> >> It's simply not allowed to free up 
resources during suspend since those can't be acquired again during resume.
  >> The in_suspend flag is set at the beginning of 
suspend and unset at the end of resume. It can't filter the case you mentioned.

   Why not? This is exactly what it should do.

[Jack] If freeing up resources during resume, it should not hit the issue you 
described. But only checking in_suspend flag would take these cases as warning.

Regards,
Jack

From: Koenig, Christian 
Sent: Monday, February 6, 2023 4:06 PM
To: Xiao, Jack ; Christian König 
; amd-gfx@lists.freedesktop.org; Deucher, 
Alexander 
Subject: Re: [PATCH] drm/amdgpu: only WARN freeing buffers when DMA is 
unavailable

Am 06.02.23 um 08:23 schrieb Xiao, Jack:

[AMD Official Use Only - General]

>> Nope, that is not related to any hw state.

can use other flag.

>> It's simply not allowed to free up resources during suspend since those 
>> can't be acquired again during resume.
The in_suspend flag is set at the beginning of suspend and unset at the end of 
resume. It can't filter the case you mentioned.

Why not? This is exactly what it should do.

Do you know the root cause of these cases hitting the issue? So that we can get 
an exact point to warn the freeing up behavior.

Well the root cause are programming errors. See between suspending and resuming 
you should not allocate nor free memory.

Otherwise we can run into trouble. And this check here is one part of that, we 
should probably add another warning during allocation of memory. But this here 
is certainly correct.

Regards,
Christian.


Thanks,
Jack

From: Christian König 

Sent: Friday, February 3, 2023 9:20 PM
To: Xiao, Jack ; Koenig, Christian 
; 
amd-gfx@lists.freedesktop.org; Deucher, 
Alexander 
Subject: Re: [PATCH] drm/amdgpu: only WARN freeing buffers when DMA is 
unavailable

Nope, that is not related to any hw state.

It's simply not allowed to free up resources during suspend since those can't 
be acquired again during resume.

We had a couple of cases now where this was wrong. If you get a warning from 
that please fix the code which tried to free something during suspend instead.

Regards,
Christian.
Am 03.02.23 um 07:04 schrieb Xiao, Jack:

[AMD Official Use Only - General]

>> It's simply illegal to free up memory during suspend.
Why? In my understanding, the limit was caused by DMA shutdown.

Regards,
Jack

From: Koenig, Christian 

Sent: Thursday, February 2, 2023 7:43 PM
To: Xiao, Jack ; 
amd-gfx@lists.freedesktop.org; Deucher, 
Alexander 
Subject: AW: [PATCH] drm/amdgpu: only WARN freeing buffers when DMA is 
unavailable

Big NAK to this! This warning is not related in any way to the hw state.

It's simply illegal to free up memory during suspend.

Regards,
Christian.


Von: Xiao, Jack mailto:jack.x...@amd.com>>
Gesendet: Donnerstag, 2. Februar 2023 10:54
An: amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>; Deucher, 
Alexander mailto:alexander.deuc...@amd.com>>; 
Koenig, Christian mailto:christian.koe...@amd.com>>
Cc: Xiao, Jack mailto:jack.x...@amd.com>>
Betreff: [PATCH] drm/amdgpu: only WARN freeing buffers when DMA is unavailable

Reduce waringings, only warn when DMA is unavailable.

Signed-off-by: Jack Xiao mailto:jack.x...@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 2d237f3d3a2e..e3e3764ea697 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -422,7 +422,8 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 
*gpu_addr,
 if (*bo == NULL)
 return;

-   WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend);
+   WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend &&
+   
!amdgpu_ttm_adev((*bo)->tbo.bdev)->ip_blocks[AMD_IP_BLOCK_TYPE_SDMA].status.hw);

 if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
 if (cpu_addr)
--
2.37.3




RE: [PATCH] drm/amd/display: Disable migration to ensure consistency of per-CPU variable

2023-02-06 Thread Yin, Tianci (Rico)
[AMD Official Use Only - General]

Loop  Charlie.

Thanks,
Rico

Sent from Mail for Windows

From: Tianci Yin
Sent: Monday, February 6, 2023 3:59 PM
To: amd-gfx@lists.freedesktop.org
Cc: Wentland, Harry; Siqueira, 
Rodrigo; Pillai, 
Aurabindo; Yin, Tianci 
(Rico)
Subject: [PATCH] drm/amd/display: Disable migration to ensure consistency of 
per-CPU variable

From: tiancyin 

[why]
Since the variable fpu_recursion_depth is per-CPU type, it has one copy
on each CPU, thread migration causes data consistency issue, then the
call trace shows up. And preemption disabling can't prevent migration.

[how]
Disable migration to ensure consistency of fpu_recursion_depth.

Signed-off-by: tiancyin 
---
 amdgpu_dm/dc_fpu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/amdgpu_dm/dc_fpu.c b/amdgpu_dm/dc_fpu.c
index 1743ca0a36..c42aa947c9 100644
--- a/amdgpu_dm/dc_fpu.c
+++ b/amdgpu_dm/dc_fpu.c
@@ -89,6 +89,7 @@ void dc_fpu_begin(const char *function_name, const int line)

 if (*pcpu == 1) {
 #if defined(CONFIG_X86)
+   migrate_disable();
 kernel_fpu_begin();
 #elif defined(CONFIG_PPC64)
 if (cpu_has_feature(CPU_FTR_VSX_COMP)) {
@@ -129,6 +130,7 @@ void dc_fpu_end(const char *function_name, const int line)
 if (*pcpu <= 0) {
 #if defined(CONFIG_X86)
 kernel_fpu_end();
+   migrate_enable();
 #elif defined(CONFIG_PPC64)
 if (cpu_has_feature(CPU_FTR_VSX_COMP)) {
 disable_kernel_vsx();
--
2.34.1



Re: [PATCH] drm/amdgpu: only WARN freeing buffers when DMA is unavailable

2023-02-06 Thread Christian König

Am 06.02.23 um 08:23 schrieb Xiao, Jack:


[AMD Official Use Only - General]

>> Nope, that is not related to any hw state.

can use other flag.

>> It's simply not allowed to free up resources during suspend since 
those can't be acquired again during resume.


The in_suspend flag is set at the beginning of suspend and unset at 
the end of resume. It can’t filter the case you mentioned.




Why not? This is exactly what it should do.

Do you know the root cause of these cases hitting the issue? So that 
we can get an exact point to warn the freeing up behavior.




Well the root cause are programming errors. See between suspending and 
resuming you should not allocate nor free memory.


Otherwise we can run into trouble. And this check here is one part of 
that, we should probably add another warning during allocation of 
memory. But this here is certainly correct.


Regards,
Christian.


Thanks,

Jack

*From:* Christian König 
*Sent:* Friday, February 3, 2023 9:20 PM
*To:* Xiao, Jack ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org; Deucher, 
Alexander 
*Subject:* Re: [PATCH] drm/amdgpu: only WARN freeing buffers when DMA 
is unavailable


Nope, that is not related to any hw state.

It's simply not allowed to free up resources during suspend since 
those can't be acquired again during resume.


We had a couple of cases now where this was wrong. If you get a 
warning from that please fix the code which tried to free something 
during suspend instead.


Regards,
Christian.

Am 03.02.23 um 07:04 schrieb Xiao, Jack:

[AMD Official Use Only - General]

>> It's simply illegal to free up memory during suspend.

Why? In my understanding, the limit was caused by DMA shutdown.

Regards,

Jack

*From:* Koenig, Christian 

*Sent:* Thursday, February 2, 2023 7:43 PM
*To:* Xiao, Jack  ;
amd-gfx@lists.freedesktop.org; Deucher, Alexander
 
*Subject:* AW: [PATCH] drm/amdgpu: only WARN freeing buffers when
DMA is unavailable

Big NAK to this! This warning is not related in any way to the hw
state.

It's simply illegal to free up memory during suspend.

Regards,

Christian.



*Von:*Xiao, Jack 
*Gesendet:* Donnerstag, 2. Februar 2023 10:54
*An:* amd-gfx@lists.freedesktop.org
; Deucher, Alexander
; Koenig, Christian

*Cc:* Xiao, Jack 
*Betreff:* [PATCH] drm/amdgpu: only WARN freeing buffers when DMA
is unavailable

Reduce waringings, only warn when DMA is unavailable.

Signed-off-by: Jack Xiao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 2d237f3d3a2e..e3e3764ea697 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -422,7 +422,8 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo
**bo, u64 *gpu_addr,
 if (*bo == NULL)
 return;

- WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend);
+ WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend &&
+

!amdgpu_ttm_adev((*bo)->tbo.bdev)->ip_blocks[AMD_IP_BLOCK_TYPE_SDMA].status.hw);

 if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
 if (cpu_addr)
-- 
2.37.3