[PATCH 07/11] drm/radeon: use one VMID for each ring
From: Christian König Use multiple VMIDs for each VM, one for each ring. That allows us to execute flushes separately on each ring, still not ideal cause in a lot of cases rings can share IDs. Signed-off-by: Christian König --- drivers/gpu/drm/radeon/cik.c | 4 +-- drivers/gpu/drm/radeon/cik_sdma.c | 2 +- drivers/gpu/drm/radeon/ni.c| 6 ++-- drivers/gpu/drm/radeon/ni_dma.c| 3 +- drivers/gpu/drm/radeon/radeon.h| 36 +-- drivers/gpu/drm/radeon/radeon_vm.c | 59 +++--- drivers/gpu/drm/radeon/si.c| 6 ++-- 7 files changed, 68 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 6bb8b84..510aeef 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4051,6 +4051,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; u32 header, control = INDIRECT_BUFFER_VALID; if (ib->is_const_ib) { @@ -4079,8 +4080,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); } - control |= ib->length_dw | - (ib->vm ? (ib->vm->id << 24) : 0); + control |= ib->length_dw | (vm_id << 24); radeon_ring_write(ring, header); radeon_ring_write(ring, diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 604e2e7..54b9837 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; - u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; + u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf; if (rdev->wb.enabled) { u32 next_rptr = ring->wptr + 5; diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index bee432d..360de9f 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1373,6 +1373,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA; @@ -1395,15 +1396,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) #endif (ib->gpu_addr & 0xFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); - radeon_ring_write(ring, ib->length_dw | - (ib->vm ? (ib->vm->id << 24) : 0)); + radeon_ring_write(ring, ib->length_dw | (vm_id << 24)); /* flush read cache over gart for this vmid */ radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); radeon_ring_write(ring, 0x); radeon_ring_write(ring, 0); - radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */ + radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */ } static void cayman_cp_enable(struct radeon_device *rdev, bool enable) diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 5a72404..50f8861 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; if (rdev->wb.enabled) { u32 next_rptr = ring->wptr + 4; @@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev, */ while ((ring->wptr & 7) != 5) radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); - radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); + radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); radeon_ring_write(ring, (ib->gpu_addr & 0xFFE0)); radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ac4660a..e3e77f7 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -901,33 +901,39 @@ struct radeon_vm_pt { uint6
[PATCH 07/11] drm/radeon: use one VMID for each ring
From: Christian K?nig Use multiple VMIDs for each VM, one for each ring. That allows us to execute flushes separately on each ring, still not ideal cause in a lot of cases rings can share IDs. Signed-off-by: Christian K?nig --- drivers/gpu/drm/radeon/cik.c | 4 +-- drivers/gpu/drm/radeon/cik_sdma.c | 2 +- drivers/gpu/drm/radeon/ni.c| 6 ++-- drivers/gpu/drm/radeon/ni_dma.c| 3 +- drivers/gpu/drm/radeon/radeon.h| 36 +-- drivers/gpu/drm/radeon/radeon_vm.c | 59 +++--- drivers/gpu/drm/radeon/si.c| 6 ++-- 7 files changed, 68 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 203e895..1dc4e4d 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4042,6 +4042,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; u32 header, control = INDIRECT_BUFFER_VALID; if (ib->is_const_ib) { @@ -4070,8 +4071,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); } - control |= ib->length_dw | - (ib->vm ? (ib->vm->id << 24) : 0); + control |= ib->length_dw | (vm_id << 24); radeon_ring_write(ring, header); radeon_ring_write(ring, diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 1216a3c..4ebcd47 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; - u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; + u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf; if (rdev->wb.enabled) { u32 next_rptr = ring->wptr + 5; diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index bee432d..360de9f 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1373,6 +1373,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA; @@ -1395,15 +1396,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) #endif (ib->gpu_addr & 0xFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); - radeon_ring_write(ring, ib->length_dw | - (ib->vm ? (ib->vm->id << 24) : 0)); + radeon_ring_write(ring, ib->length_dw | (vm_id << 24)); /* flush read cache over gart for this vmid */ radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); radeon_ring_write(ring, 0x); radeon_ring_write(ring, 0); - radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */ + radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */ } static void cayman_cp_enable(struct radeon_device *rdev, bool enable) diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 1d15f6b..efe98b1 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; if (rdev->wb.enabled) { u32 next_rptr = ring->wptr + 4; @@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev, */ while ((ring->wptr & 7) != 5) radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); - radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); + radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); radeon_ring_write(ring, (ib->gpu_addr & 0xFFE0)); radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index c2873d4..2813a60 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -900,33 +900,39 @@ struct radeon_vm_pt { uint64_
[PATCH 07/11] drm/radeon: use one VMID for each ring
From: Christian K?nig Use multiple VMIDs for each VM, one for each ring. That allows us to execute flushes separately on each ring, still not ideal cause in a lot of cases rings can share IDs. Signed-off-by: Christian K?nig --- drivers/gpu/drm/radeon/cik.c | 4 +-- drivers/gpu/drm/radeon/cik_sdma.c | 2 +- drivers/gpu/drm/radeon/ni.c| 6 ++-- drivers/gpu/drm/radeon/ni_dma.c| 3 +- drivers/gpu/drm/radeon/radeon.h| 36 +-- drivers/gpu/drm/radeon/radeon_vm.c | 59 +++--- drivers/gpu/drm/radeon/si.c| 6 ++-- 7 files changed, 68 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 6dd55ea..fae5a8c 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4042,6 +4042,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; u32 header, control = INDIRECT_BUFFER_VALID; if (ib->is_const_ib) { @@ -4070,8 +4071,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); } - control |= ib->length_dw | - (ib->vm ? (ib->vm->id << 24) : 0); + control |= ib->length_dw | (vm_id << 24); radeon_ring_write(ring, header); radeon_ring_write(ring, diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 06602e4..1374ecf 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; - u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; + u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf; if (rdev->wb.enabled) { u32 next_rptr = ring->wptr + 5; diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 7f451aa..aca3e91 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1366,6 +1366,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA; @@ -1388,15 +1389,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) #endif (ib->gpu_addr & 0xFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); - radeon_ring_write(ring, ib->length_dw | - (ib->vm ? (ib->vm->id << 24) : 0)); + radeon_ring_write(ring, ib->length_dw | (vm_id << 24)); /* flush read cache over gart for this vmid */ radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); radeon_ring_write(ring, 0x); radeon_ring_write(ring, 0); - radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */ + radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */ } static void cayman_cp_enable(struct radeon_device *rdev, bool enable) diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 1d15f6b..efe98b1 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; if (rdev->wb.enabled) { u32 next_rptr = ring->wptr + 4; @@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev, */ while ((ring->wptr & 7) != 5) radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); - radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); + radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); radeon_ring_write(ring, (ib->gpu_addr & 0xFFE0)); radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index fddb19e..bee8934 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -897,33 +897,39 @@ struct radeon_vm_pt { uint64_