Re: [PATCH 1/2] drm/amdgpu/gfx9: allocate queues horizontally across pipes

2017-06-06 Thread Christian König
Fell free to add an Acked-by: Christian König  
on both.


Christian.

Am 06.06.2017 um 13:43 schrieb Tom St Denis:
First is Reviewed-by: Tom St Denis  and second is 
Acked-by.


Cheers,
Tom

On 05/06/17 11:06 AM, Alex Deucher wrote:

Pipes provide better concurrency than queues, therefore we want to make
sure that apps use queues from different pipes whenever possible.

Optimize for the trivial case where an app will consume rings in order,
therefore we don't want adjacent rings to belong to the same pipe.

gfx9 was missed when these patches were rebased.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 115 
+++---

  1 file changed, 52 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index 9502353..0c48f6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1483,9 +1483,44 @@ static int gfx_v9_0_ngg_en(struct 
amdgpu_device *adev)

  return 0;
  }
  +static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, 
int ring_id,

+  int mec, int pipe, int queue)
+{
+int r;
+unsigned irq_type;
+struct amdgpu_ring *ring = >gfx.compute_ring[ring_id];
+
+ring = >gfx.compute_ring[ring_id];
+
+/* mec0 is me1 */
+ring->me = mec + 1;
+ring->pipe = pipe;
+ring->queue = queue;
+
+ring->ring_obj = NULL;
+ring->use_doorbell = true;
+ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
++ (ring_id * GFX9_MEC_HPD_SIZE);
+sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, 
ring->queue);

+
+irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
++ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
++ ring->pipe;
+
+/* type-2 packets are deprecated on MEC, use type-3 instead */
+r = amdgpu_ring_init(adev, ring, 1024,
+ >gfx.eop_irq, irq_type);
+if (r)
+return r;
+
+
+return 0;
+}
+
  static int gfx_v9_0_sw_init(void *handle)
  {
-int i, r, ring_id;
+int i, j, k, r, ring_id;
  struct amdgpu_ring *ring;
  struct amdgpu_kiq *kiq;
  struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1547,69 +1582,23 @@ static int gfx_v9_0_sw_init(void *handle)
  return r;
  }
  -/* set up the compute queues */
-for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
-unsigned irq_type;
-
-if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-continue;
-
-if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
-break;
-
-ring = >gfx.compute_ring[ring_id];
-
-/* mec0 is me1 */
-ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
-/ adev->gfx.mec.num_pipe_per_mec)
-+ 1;
-ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-% adev->gfx.mec.num_pipe_per_mec;
-ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
-
-ring->ring_obj = NULL;
-ring->use_doorbell = true;
-ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + 
(ring_id * GFX9_MEC_HPD_SIZE);

-ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
-sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, 
ring->queue);

-
-irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
-+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
-+ ring->pipe;
-
-/* type-2 packets are deprecated on MEC, use type-3 instead */
-r = amdgpu_ring_init(adev, ring, 1024, >gfx.eop_irq,
- irq_type);
-if (r)
-return r;
-
-ring_id++;
-}
-
-/* set up the compute queues */
-for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
-unsigned irq_type;
-
-/* max 32 queues per MEC */
-if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
-DRM_ERROR("Too many (%d) compute rings!\n", i);
-break;
+/* set up the compute queues - allocate horizontally across 
pipes */

+ring_id = 0;
+for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
+continue;
+
+r = gfx_v9_0_compute_ring_init(adev,
+   ring_id,
+   i, k, j);
+if (r)
+return r;
+
+ring_id++;
+}
  }
-ring = >gfx.compute_ring[i];
-ring->ring_obj = NULL;
-ring->use_doorbell = true;
-ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) 

Re: [PATCH 1/2] drm/amdgpu/gfx9: allocate queues horizontally across pipes

2017-06-06 Thread Tom St Denis
First is Reviewed-by: Tom St Denis  and second is 
Acked-by.


Cheers,
Tom

On 05/06/17 11:06 AM, Alex Deucher wrote:

Pipes provide better concurrency than queues, therefore we want to make
sure that apps use queues from different pipes whenever possible.

Optimize for the trivial case where an app will consume rings in order,
therefore we don't want adjacent rings to belong to the same pipe.

gfx9 was missed when these patches were rebased.

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 115 +++---
  1 file changed, 52 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9502353..0c48f6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1483,9 +1483,44 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
return 0;
  }
  
+static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,

+ int mec, int pipe, int queue)
+{
+   int r;
+   unsigned irq_type;
+   struct amdgpu_ring *ring = >gfx.compute_ring[ring_id];
+
+   ring = >gfx.compute_ring[ring_id];
+
+   /* mec0 is me1 */
+   ring->me = mec + 1;
+   ring->pipe = pipe;
+   ring->queue = queue;
+
+   ring->ring_obj = NULL;
+   ring->use_doorbell = true;
+   ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+   ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+   + (ring_id * GFX9_MEC_HPD_SIZE);
+   sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+   irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+   + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+   + ring->pipe;
+
+   /* type-2 packets are deprecated on MEC, use type-3 instead */
+   r = amdgpu_ring_init(adev, ring, 1024,
+>gfx.eop_irq, irq_type);
+   if (r)
+   return r;
+
+
+   return 0;
+}
+
  static int gfx_v9_0_sw_init(void *handle)
  {
-   int i, r, ring_id;
+   int i, j, k, r, ring_id;
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1547,69 +1582,23 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
}
  
-	/* set up the compute queues */

-   for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
-   unsigned irq_type;
-
-   if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-   continue;
-
-   if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
-   break;
-
-   ring = >gfx.compute_ring[ring_id];
-
-   /* mec0 is me1 */
-   ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
-   / adev->gfx.mec.num_pipe_per_mec)
-   + 1;
-   ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-   % adev->gfx.mec.num_pipe_per_mec;
-   ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
-
-   ring->ring_obj = NULL;
-   ring->use_doorbell = true;
-   ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id 
* GFX9_MEC_HPD_SIZE);
-   ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
-   sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, 
ring->queue);
-
-   irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
-   + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
-   + ring->pipe;
-
-   /* type-2 packets are deprecated on MEC, use type-3 instead */
-   r = amdgpu_ring_init(adev, ring, 1024, >gfx.eop_irq,
-irq_type);
-   if (r)
-   return r;
-
-   ring_id++;
-   }
-
-   /* set up the compute queues */
-   for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
-   unsigned irq_type;
-
-   /* max 32 queues per MEC */
-   if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
-   DRM_ERROR("Too many (%d) compute rings!\n", i);
-   break;
+   /* set up the compute queues - allocate horizontally across pipes */
+   ring_id = 0;
+   for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+   for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+   for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+   if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
+   continue;
+
+   r = gfx_v9_0_compute_ring_init(adev,
+

[PATCH 1/2] drm/amdgpu/gfx9: allocate queues horizontally across pipes

2017-06-05 Thread Alex Deucher
Pipes provide better concurrency than queues, therefore we want to make
sure that apps use queues from different pipes whenever possible.

Optimize for the trivial case where an app will consume rings in order,
therefore we don't want adjacent rings to belong to the same pipe.

gfx9 was missed when these patches were rebased.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 115 +++---
 1 file changed, 52 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9502353..0c48f6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1483,9 +1483,44 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
return 0;
 }
 
+static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+   int r;
+   unsigned irq_type;
+   struct amdgpu_ring *ring = >gfx.compute_ring[ring_id];
+
+   ring = >gfx.compute_ring[ring_id];
+
+   /* mec0 is me1 */
+   ring->me = mec + 1;
+   ring->pipe = pipe;
+   ring->queue = queue;
+
+   ring->ring_obj = NULL;
+   ring->use_doorbell = true;
+   ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+   ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+   + (ring_id * GFX9_MEC_HPD_SIZE);
+   sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+   irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+   + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+   + ring->pipe;
+
+   /* type-2 packets are deprecated on MEC, use type-3 instead */
+   r = amdgpu_ring_init(adev, ring, 1024,
+>gfx.eop_irq, irq_type);
+   if (r)
+   return r;
+
+
+   return 0;
+}
+
 static int gfx_v9_0_sw_init(void *handle)
 {
-   int i, r, ring_id;
+   int i, j, k, r, ring_id;
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1547,69 +1582,23 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
}
 
-   /* set up the compute queues */
-   for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
-   unsigned irq_type;
-
-   if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-   continue;
-
-   if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
-   break;
-
-   ring = >gfx.compute_ring[ring_id];
-
-   /* mec0 is me1 */
-   ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
-   / adev->gfx.mec.num_pipe_per_mec)
-   + 1;
-   ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-   % adev->gfx.mec.num_pipe_per_mec;
-   ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
-
-   ring->ring_obj = NULL;
-   ring->use_doorbell = true;
-   ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id 
* GFX9_MEC_HPD_SIZE);
-   ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
-   sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, 
ring->queue);
-
-   irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
-   + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
-   + ring->pipe;
-
-   /* type-2 packets are deprecated on MEC, use type-3 instead */
-   r = amdgpu_ring_init(adev, ring, 1024, >gfx.eop_irq,
-irq_type);
-   if (r)
-   return r;
-
-   ring_id++;
-   }
-
-   /* set up the compute queues */
-   for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
-   unsigned irq_type;
-
-   /* max 32 queues per MEC */
-   if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
-   DRM_ERROR("Too many (%d) compute rings!\n", i);
-   break;
+   /* set up the compute queues - allocate horizontally across pipes */
+   ring_id = 0;
+   for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+   for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+   for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+   if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
+   continue;
+
+   r = gfx_v9_0_compute_ring_init(adev,
+  ring_id,
+  i, k, j);
+