Module: Mesa Branch: staging/23.3 Commit: 64741669def3183191bc5b9c4684f42fdeef1040 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=64741669def3183191bc5b9c4684f42fdeef1040
Author: Kenneth Graunke <[email protected]> Date: Fri Dec 22 04:48:20 2023 -0800 iris: Skip mi_builder init for indirect draws We only need it for indirect draws. Improves performance on an i7-12700 and A770: - Piglit's drawoverhead base case +150.639% +/- 2.86933% (n=15). - gfxbench5 gl_driver2_off +19.7219% +/- 1.13778% (n=15) - SPECviewperf2020 catiav5test1 +1.6831% +/- 0.552052% (n=10). Cc: mesa-stable Reviewed-by: José Roberto de Souza <[email protected]> Reviewed-by: Lionel Landwerlin <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26806> (cherry picked from commit 55c262898ae7188311c89a60e4ec0fbb67b7a95b) --- .pick_status.json | 2 +- src/gallium/drivers/iris/iris_state.c | 178 +++++++++++++++++----------------- 2 files changed, 91 insertions(+), 89 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index f495786d370..0c787e91547 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -4,7 +4,7 @@ "description": "iris: Skip mi_builder init for indirect draws", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 33301e548c8..7f7d387db16 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -7869,6 +7869,11 @@ iris_upload_render_state(struct iris_context *ice, #endif } + if (indirect) { + struct mi_builder b; + uint32_t mocs; + mi_builder_init(&b, batch->screen->devinfo, batch); + #define _3DPRIM_END_OFFSET 0x2420 #define _3DPRIM_START_VERTEX 0x2430 #define _3DPRIM_VERTEX_COUNT 0x2434 @@ -7876,103 +7881,100 @@ iris_upload_render_state(struct iris_context *ice, #define _3DPRIM_START_INSTANCE 0x243C #define _3DPRIM_BASE_VERTEX 0x2440 - struct mi_builder b; - uint32_t mocs; - mi_builder_init(&b, batch->screen->devinfo, batch); + if (!indirect->count_from_stream_output) { + if (indirect->indirect_draw_count) { + use_predicate = true; + + struct iris_bo *draw_count_bo = + iris_resource_bo(indirect->indirect_draw_count); + unsigned draw_count_offset = + indirect->indirect_draw_count_offset; + mocs = iris_mocs(draw_count_bo, &batch->screen->isl_dev, 0); + mi_builder_set_mocs(&b, mocs); + + if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) { + /* comparison = draw id < draw count */ + struct mi_value comparison = + mi_ult(&b, mi_imm(drawid_offset), + mi_mem32(ro_bo(draw_count_bo, draw_count_offset))); + + /* predicate = comparison & conditional rendering predicate */ + mi_store(&b, mi_reg32(MI_PREDICATE_RESULT), + mi_iand(&b, comparison, mi_reg32(CS_GPR(15)))); + } else { + uint32_t mi_predicate; - if (indirect && !indirect->count_from_stream_output) { - if (indirect->indirect_draw_count) { - use_predicate = true; + /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */ + mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(drawid_offset)); + /* Upload the current draw count from the draw parameters buffer + * to MI_PREDICATE_SRC0. Zero the top 32-bits of + * MI_PREDICATE_SRC0. + */ + mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), + mi_mem32(ro_bo(draw_count_bo, draw_count_offset))); + + if (drawid_offset == 0) { + mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV | + MI_PREDICATE_COMBINEOP_SET | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL; + } else { + /* While draw_index < draw_count the predicate's result will be + * (draw_index == draw_count) ^ TRUE = TRUE + * When draw_index == draw_count the result is + * (TRUE) ^ TRUE = FALSE + * After this all results will be: + * (FALSE) ^ FALSE = FALSE + */ + mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD | + MI_PREDICATE_COMBINEOP_XOR | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL; + } + iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t)); + } + } + struct iris_bo *bo = iris_resource_bo(indirect->buffer); + assert(bo); - struct iris_bo *draw_count_bo = - iris_resource_bo(indirect->indirect_draw_count); - unsigned draw_count_offset = - indirect->indirect_draw_count_offset; - mocs = iris_mocs(draw_count_bo, &batch->screen->isl_dev, 0); + mocs = iris_mocs(bo, &batch->screen->isl_dev, 0); mi_builder_set_mocs(&b, mocs); - if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) { - /* comparison = draw id < draw count */ - struct mi_value comparison = - mi_ult(&b, mi_imm(drawid_offset), - mi_mem32(ro_bo(draw_count_bo, draw_count_offset))); - - /* predicate = comparison & conditional rendering predicate */ - mi_store(&b, mi_reg32(MI_PREDICATE_RESULT), - mi_iand(&b, comparison, mi_reg32(CS_GPR(15)))); + mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT), + mi_mem32(ro_bo(bo, indirect->offset + 0))); + mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT), + mi_mem32(ro_bo(bo, indirect->offset + 4))); + mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX), + mi_mem32(ro_bo(bo, indirect->offset + 8))); + if (draw->index_size) { + mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), + mi_mem32(ro_bo(bo, indirect->offset + 12))); + mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), + mi_mem32(ro_bo(bo, indirect->offset + 16))); } else { - uint32_t mi_predicate; + mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), + mi_mem32(ro_bo(bo, indirect->offset + 12))); + mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), mi_imm(0)); + } + } else if (indirect->count_from_stream_output) { + struct iris_stream_output_target *so = + (void *) indirect->count_from_stream_output; + struct iris_bo *so_bo = iris_resource_bo(so->offset.res); - /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */ - mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(drawid_offset)); - /* Upload the current draw count from the draw parameters buffer - * to MI_PREDICATE_SRC0. Zero the top 32-bits of - * MI_PREDICATE_SRC0. - */ - mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), - mi_mem32(ro_bo(draw_count_bo, draw_count_offset))); + mocs = iris_mocs(so_bo, &batch->screen->isl_dev, 0); + mi_builder_set_mocs(&b, mocs); - if (drawid_offset == 0) { - mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV | - MI_PREDICATE_COMBINEOP_SET | - MI_PREDICATE_COMPAREOP_SRCS_EQUAL; - } else { - /* While draw_index < draw_count the predicate's result will be - * (draw_index == draw_count) ^ TRUE = TRUE - * When draw_index == draw_count the result is - * (TRUE) ^ TRUE = FALSE - * After this all results will be: - * (FALSE) ^ FALSE = FALSE - */ - mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD | - MI_PREDICATE_COMBINEOP_XOR | - MI_PREDICATE_COMPAREOP_SRCS_EQUAL; - } - iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t)); - } - } - struct iris_bo *bo = iris_resource_bo(indirect->buffer); - assert(bo); - - mocs = iris_mocs(bo, &batch->screen->isl_dev, 0); - mi_builder_set_mocs(&b, mocs); - - mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT), - mi_mem32(ro_bo(bo, indirect->offset + 0))); - mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT), - mi_mem32(ro_bo(bo, indirect->offset + 4))); - mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX), - mi_mem32(ro_bo(bo, indirect->offset + 8))); - if (draw->index_size) { - mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), - mi_mem32(ro_bo(bo, indirect->offset + 12))); - mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), - mi_mem32(ro_bo(bo, indirect->offset + 16))); - } else { - mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), - mi_mem32(ro_bo(bo, indirect->offset + 12))); + iris_emit_buffer_barrier_for(batch, so_bo, IRIS_DOMAIN_OTHER_READ); + + struct iris_address addr = ro_bo(so_bo, so->offset.offset); + struct mi_value offset = + mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset); + mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT), + mi_udiv32_imm(&b, offset, so->stride)); + mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX), mi_imm(0)); mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), mi_imm(0)); + mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), mi_imm(0)); + mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT), + mi_imm(draw->instance_count)); } - } else if (indirect && indirect->count_from_stream_output) { - struct iris_stream_output_target *so = - (void *) indirect->count_from_stream_output; - struct iris_bo *so_bo = iris_resource_bo(so->offset.res); - - mocs = iris_mocs(so_bo, &batch->screen->isl_dev, 0); - mi_builder_set_mocs(&b, mocs); - - iris_emit_buffer_barrier_for(batch, so_bo, IRIS_DOMAIN_OTHER_READ); - - struct iris_address addr = ro_bo(so_bo, so->offset.offset); - struct mi_value offset = - mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset); - mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT), - mi_udiv32_imm(&b, offset, so->stride)); - mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX), mi_imm(0)); - mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), mi_imm(0)); - mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), mi_imm(0)); - mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT), - mi_imm(draw->instance_count)); } iris_measure_snapshot(ice, batch, INTEL_SNAPSHOT_DRAW, draw, indirect, sc);
