From: Marek Olšák <marek.ol...@amd.com> compute and SDMA will be added into it. --- src/gallium/drivers/radeonsi/si_blit.c | 2 +- src/gallium/drivers/radeonsi/si_cp_dma.c | 33 ++++++++++++------- src/gallium/drivers/radeonsi/si_pipe.c | 3 +- src/gallium/drivers/radeonsi/si_pipe.h | 8 +++-- .../drivers/radeonsi/si_test_dma_perf.c | 4 +-- 5 files changed, 33 insertions(+), 17 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index fcaff80125c..8f7aa0815b9 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -903,21 +903,21 @@ void si_resource_copy_region(struct pipe_context *ctx, struct si_context *sctx = (struct si_context *)ctx; struct si_texture *ssrc = (struct si_texture*)src; struct pipe_surface *dst_view, dst_templ; struct pipe_sampler_view src_templ, *src_view; unsigned dst_width, dst_height, src_width0, src_height0; unsigned dst_width0, dst_height0, src_force_level = 0; struct pipe_box sbox, dstbox; /* Handle buffers first. */ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { - si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, 0, -1); + si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width); return; } assert(u_max_sample(dst) == u_max_sample(src)); /* The driver doesn't decompress resources automatically while * u_blitter is rendering. */ si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level, src_box->z, src_box->z + src_box->depth - 1); diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index ad53682b1b2..e85bb9b1acf 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -426,36 +426,32 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size, va = sctx->scratch_buffer->gpu_address; si_emit_cp_dma(sctx, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags, cache_policy); } /** * Do memcpy between buffers using CP DMA. * * \param user_flags bitmask of SI_CPDMA_* */ -void si_copy_buffer(struct si_context *sctx, - struct pipe_resource *dst, struct pipe_resource *src, - uint64_t dst_offset, uint64_t src_offset, unsigned size, - unsigned user_flags, enum si_cache_policy cache_policy) +void si_cp_dma_copy_buffer(struct si_context *sctx, + struct pipe_resource *dst, struct pipe_resource *src, + uint64_t dst_offset, uint64_t src_offset, unsigned size, + unsigned user_flags, enum si_coherency coher, + enum si_cache_policy cache_policy) { uint64_t main_dst_offset, main_src_offset; unsigned skipped_size = 0; unsigned realign_size = 0; - enum si_coherency coher = SI_COHERENCY_SHADER; bool is_first = true; - if (!size) - return; - - if (cache_policy == -1) - cache_policy = get_cache_policy(sctx, coher); + assert(size); if (dst != src || dst_offset != src_offset) { /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, dst_offset + size); } dst_offset += r600_resource(dst)->gpu_address; @@ -520,35 +516,50 @@ void si_copy_buffer(struct si_context *sctx, si_emit_cp_dma(sctx, dst_offset, src_offset, skipped_size, dma_flags, cache_policy); } /* Finally, realign the engine if the size wasn't aligned. */ if (realign_size) { si_cp_dma_realign_engine(sctx, realign_size, user_flags, coher, cache_policy, &is_first); } +} + +void si_copy_buffer(struct si_context *sctx, + struct pipe_resource *dst, struct pipe_resource *src, + uint64_t dst_offset, uint64_t src_offset, unsigned size) +{ + enum si_coherency coher = SI_COHERENCY_SHADER; + enum si_cache_policy cache_policy = get_cache_policy(sctx, coher); + + if (!size) + return; + + si_cp_dma_copy_buffer(sctx, dst, src, dst_offset, src_offset, size, + 0, coher, cache_policy); if (cache_policy != L2_BYPASS) r600_resource(dst)->TC_L2_dirty = true; /* If it's not a prefetch... */ if (dst_offset != src_offset) sctx->num_cp_dma_calls++; } void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, uint64_t offset, unsigned size) { assert(sctx->chip_class >= CIK); - si_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL, L2_LRU); + si_cp_dma_copy_buffer(sctx, buf, buf, offset, offset, size, + SI_CPDMA_SKIP_ALL, SI_COHERENCY_SHADER, L2_LRU); } static void cik_prefetch_shader_async(struct si_context *sctx, struct si_pm4_state *state) { struct pipe_resource *bo = &state->bo[0]->b.b; assert(state->nbo == 1); cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index bce7b3f550e..8ef29c25df2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -723,21 +723,22 @@ static void si_test_vmfault(struct si_screen *sscreen) pipe_buffer_create_const0(&sscreen->b, 0, PIPE_USAGE_DEFAULT, 64); if (!buf) { puts("Buffer allocation failed."); exit(1); } r600_resource(buf)->gpu_address = 0; /* cause a VM fault */ if (sscreen->debug_flags & DBG(TEST_VMFAULT_CP)) { - si_copy_buffer(sctx, buf, buf, 0, 4, 4, 0, -1); + si_cp_dma_copy_buffer(sctx, buf, buf, 0, 4, 4, 0, + SI_COHERENCY_NONE, L2_BYPASS); ctx->flush(ctx, NULL, 0); puts("VM fault test: CP - done."); } if (sscreen->debug_flags & DBG(TEST_VMFAULT_SDMA)) { si_sdma_clear_buffer(sctx, buf, 0, 4, 0); ctx->flush(ctx, NULL, 0); puts("VM fault test: SDMA - done."); } if (sscreen->debug_flags & DBG(TEST_VMFAULT_SHADER)) { util_test_constant_buffer(ctx, buf); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index a6f09b65f74..29d7e555a0c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1120,24 +1120,28 @@ enum si_coherency { }; void si_cp_dma_wait_for_idle(struct si_context *sctx); void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value, enum si_coherency coher, enum si_cache_policy cache_policy); void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value, enum si_coherency coher); +void si_cp_dma_copy_buffer(struct si_context *sctx, + struct pipe_resource *dst, struct pipe_resource *src, + uint64_t dst_offset, uint64_t src_offset, unsigned size, + unsigned user_flags, enum si_coherency coher, + enum si_cache_policy cache_policy); void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, - uint64_t dst_offset, uint64_t src_offset, unsigned size, - unsigned user_flags, enum si_cache_policy cache_policy); + uint64_t dst_offset, uint64_t src_offset, unsigned size); void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, uint64_t offset, unsigned size); void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only); void si_init_cp_dma_functions(struct si_context *sctx); /* si_debug.c */ void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, struct radeon_saved_cs *saved, bool get_buffer_list); void si_clear_saved_cs(struct radeon_saved_cs *saved); void si_destroy_saved_cs(struct si_saved_cs *scs); diff --git a/src/gallium/drivers/radeonsi/si_test_dma_perf.c b/src/gallium/drivers/radeonsi/si_test_dma_perf.c index f097a642999..6c04720e963 100644 --- a/src/gallium/drivers/radeonsi/si_test_dma_perf.c +++ b/src/gallium/drivers/radeonsi/si_test_dma_perf.c @@ -171,22 +171,22 @@ void si_test_dma_perf(struct si_screen *sscreen) src = is_copy ? pipe_buffer_create(screen, 0, src_usage, size) : NULL; /* Run tests. */ for (unsigned iter = 0; iter < NUM_RUNS; iter++) { q[iter] = ctx->create_query(ctx, query_type, 0); ctx->begin_query(ctx, q[iter]); if (test_cp) { /* CP DMA */ if (is_copy) { - si_copy_buffer(sctx, dst, src, 0, 0, size, 0, - cache_policy); + si_cp_dma_copy_buffer(sctx, dst, src, 0, 0, size, 0, + SI_COHERENCY_NONE, cache_policy); } else { si_cp_dma_clear_buffer(sctx, dst, 0, size, clear_value, SI_COHERENCY_NONE, cache_policy); } } else if (test_sdma) { /* SDMA */ if (is_copy) { struct pipe_box box; u_box_1d(0, size, &box); sctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, 0, &box); -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev