[PATCH 07/11] drm/radeon: use one VMID for each ring

2014-11-19 Thread Christian König
From: Christian König 

Use multiple VMIDs for each VM, one for each ring. That allows
us to execute flushes separately on each ring, still not ideal
cause in a lot of cases rings can share IDs.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/radeon/cik.c   |  4 +--
 drivers/gpu/drm/radeon/cik_sdma.c  |  2 +-
 drivers/gpu/drm/radeon/ni.c|  6 ++--
 drivers/gpu/drm/radeon/ni_dma.c|  3 +-
 drivers/gpu/drm/radeon/radeon.h| 36 +--
 drivers/gpu/drm/radeon/radeon_vm.c | 59 +++---
 drivers/gpu/drm/radeon/si.c|  6 ++--
 7 files changed, 68 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 6bb8b84..510aeef 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4051,6 +4051,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device 
*rdev,
 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
struct radeon_ring *ring = &rdev->ring[ib->ring];
+   unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 header, control = INDIRECT_BUFFER_VALID;

if (ib->is_const_ib) {
@@ -4079,8 +4080,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, 
struct radeon_ib *ib)
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
}

-   control |= ib->length_dw |
-   (ib->vm ? (ib->vm->id << 24) : 0);
+   control |= ib->length_dw | (vm_id << 24);

radeon_ring_write(ring, header);
radeon_ring_write(ring,
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c 
b/drivers/gpu/drm/radeon/cik_sdma.c
index 604e2e7..54b9837 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
  struct radeon_ib *ib)
 {
struct radeon_ring *ring = &rdev->ring[ib->ring];
-   u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
+   u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;

if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index bee432d..360de9f 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1373,6 +1373,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
struct radeon_ring *ring = &rdev->ring[ib->ring];
+   unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
PACKET3_SH_ACTION_ENA;

@@ -1395,15 +1396,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, 
struct radeon_ib *ib)
 #endif
  (ib->gpu_addr & 0xFFFC));
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
-   radeon_ring_write(ring, ib->length_dw | 
- (ib->vm ? (ib->vm->id << 24) : 0));
+   radeon_ring_write(ring, ib->length_dw | (vm_id << 24));

/* flush read cache over gart for this vmid */
radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
radeon_ring_write(ring, 0x);
radeon_ring_write(ring, 0);
-   radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* 
poll interval */
+   radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */
 }

 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
index 5a72404..50f8861 100644
--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib)
 {
struct radeon_ring *ring = &rdev->ring[ib->ring];
+   unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;

if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 4;
@@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
 */
while ((ring->wptr & 7) != 5)
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
-   radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, 
ib->vm ? ib->vm->id : 0, 0));
+   radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, 
vm_id, 0));
radeon_ring_write(ring, (ib->gpu_addr & 0xFFE0));
radeon_ring_write(ring, (ib->length_dw << 12) | 
(upper_32_bits(ib->gpu_addr) & 0xFF));

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index ac4660a..e3e77f7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -901,33 +901,39 @@ struct radeon_vm_pt {
uint6

[PATCH 07/11] drm/radeon: use one VMID for each ring

2014-10-13 Thread Christian König
From: Christian K?nig 

Use multiple VMIDs for each VM, one for each ring. That allows
us to execute flushes separately on each ring, still not ideal
cause in a lot of cases rings can share IDs.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/cik.c   |  4 +--
 drivers/gpu/drm/radeon/cik_sdma.c  |  2 +-
 drivers/gpu/drm/radeon/ni.c|  6 ++--
 drivers/gpu/drm/radeon/ni_dma.c|  3 +-
 drivers/gpu/drm/radeon/radeon.h| 36 +--
 drivers/gpu/drm/radeon/radeon_vm.c | 59 +++---
 drivers/gpu/drm/radeon/si.c|  6 ++--
 7 files changed, 68 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 203e895..1dc4e4d 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4042,6 +4042,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device 
*rdev,
 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
struct radeon_ring *ring = &rdev->ring[ib->ring];
+   unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 header, control = INDIRECT_BUFFER_VALID;

if (ib->is_const_ib) {
@@ -4070,8 +4071,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, 
struct radeon_ib *ib)
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
}

-   control |= ib->length_dw |
-   (ib->vm ? (ib->vm->id << 24) : 0);
+   control |= ib->length_dw | (vm_id << 24);

radeon_ring_write(ring, header);
radeon_ring_write(ring,
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c 
b/drivers/gpu/drm/radeon/cik_sdma.c
index 1216a3c..4ebcd47 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
  struct radeon_ib *ib)
 {
struct radeon_ring *ring = &rdev->ring[ib->ring];
-   u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
+   u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;

if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index bee432d..360de9f 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1373,6 +1373,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
struct radeon_ring *ring = &rdev->ring[ib->ring];
+   unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
PACKET3_SH_ACTION_ENA;

@@ -1395,15 +1396,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, 
struct radeon_ib *ib)
 #endif
  (ib->gpu_addr & 0xFFFC));
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
-   radeon_ring_write(ring, ib->length_dw | 
- (ib->vm ? (ib->vm->id << 24) : 0));
+   radeon_ring_write(ring, ib->length_dw | (vm_id << 24));

/* flush read cache over gart for this vmid */
radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
radeon_ring_write(ring, 0x);
radeon_ring_write(ring, 0);
-   radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* 
poll interval */
+   radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */
 }

 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
index 1d15f6b..efe98b1 100644
--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib)
 {
struct radeon_ring *ring = &rdev->ring[ib->ring];
+   unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;

if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 4;
@@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
 */
while ((ring->wptr & 7) != 5)
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
-   radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, 
ib->vm ? ib->vm->id : 0, 0));
+   radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, 
vm_id, 0));
radeon_ring_write(ring, (ib->gpu_addr & 0xFFE0));
radeon_ring_write(ring, (ib->length_dw << 12) | 
(upper_32_bits(ib->gpu_addr) & 0xFF));

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c2873d4..2813a60 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -900,33 +900,39 @@ struct radeon_vm_pt {
uint64_

[PATCH 07/11] drm/radeon: use one VMID for each ring

2014-09-12 Thread Christian König
From: Christian K?nig 

Use multiple VMIDs for each VM, one for each ring. That allows
us to execute flushes separately on each ring, still not ideal
cause in a lot of cases rings can share IDs.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/cik.c   |  4 +--
 drivers/gpu/drm/radeon/cik_sdma.c  |  2 +-
 drivers/gpu/drm/radeon/ni.c|  6 ++--
 drivers/gpu/drm/radeon/ni_dma.c|  3 +-
 drivers/gpu/drm/radeon/radeon.h| 36 +--
 drivers/gpu/drm/radeon/radeon_vm.c | 59 +++---
 drivers/gpu/drm/radeon/si.c|  6 ++--
 7 files changed, 68 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 6dd55ea..fae5a8c 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4042,6 +4042,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device 
*rdev,
 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
struct radeon_ring *ring = &rdev->ring[ib->ring];
+   unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 header, control = INDIRECT_BUFFER_VALID;

if (ib->is_const_ib) {
@@ -4070,8 +4071,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, 
struct radeon_ib *ib)
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
}

-   control |= ib->length_dw |
-   (ib->vm ? (ib->vm->id << 24) : 0);
+   control |= ib->length_dw | (vm_id << 24);

radeon_ring_write(ring, header);
radeon_ring_write(ring,
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c 
b/drivers/gpu/drm/radeon/cik_sdma.c
index 06602e4..1374ecf 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
  struct radeon_ib *ib)
 {
struct radeon_ring *ring = &rdev->ring[ib->ring];
-   u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
+   u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;

if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 7f451aa..aca3e91 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1366,6 +1366,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
struct radeon_ring *ring = &rdev->ring[ib->ring];
+   unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
PACKET3_SH_ACTION_ENA;

@@ -1388,15 +1389,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, 
struct radeon_ib *ib)
 #endif
  (ib->gpu_addr & 0xFFFC));
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
-   radeon_ring_write(ring, ib->length_dw | 
- (ib->vm ? (ib->vm->id << 24) : 0));
+   radeon_ring_write(ring, ib->length_dw | (vm_id << 24));

/* flush read cache over gart for this vmid */
radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
radeon_ring_write(ring, 0x);
radeon_ring_write(ring, 0);
-   radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* 
poll interval */
+   radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */
 }

 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
index 1d15f6b..efe98b1 100644
--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib)
 {
struct radeon_ring *ring = &rdev->ring[ib->ring];
+   unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;

if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 4;
@@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
 */
while ((ring->wptr & 7) != 5)
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
-   radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, 
ib->vm ? ib->vm->id : 0, 0));
+   radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, 
vm_id, 0));
radeon_ring_write(ring, (ib->gpu_addr & 0xFFE0));
radeon_ring_write(ring, (ib->length_dw << 12) | 
(upper_32_bits(ib->gpu_addr) & 0xFF));

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index fddb19e..bee8934 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -897,33 +897,39 @@ struct radeon_vm_pt {
uint64_