From: Marek Olšák <marek.ol...@amd.com> For a GS prolog in merged ES-GS. --- src/gallium/drivers/radeonsi/si_debug.c | 4 ++++ src/gallium/drivers/radeonsi/si_shader.c | 21 ++++++++++++++++++++- src/gallium/drivers/radeonsi/si_shader.h | 1 + 3 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 038c8b4..9634901 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -636,20 +636,24 @@ static void si_print_annotated_shader(struct si_shader *shader, calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst)); if (shader->prolog) { si_add_split_disasm(shader->prolog->binary.disasm_string, start_addr, &num_inst, instructions); } if (shader->previous_stage) { si_add_split_disasm(shader->previous_stage->binary.disasm_string, start_addr, &num_inst, instructions); } + if (shader->prolog2) { + si_add_split_disasm(shader->prolog2->binary.disasm_string, + start_addr, &num_inst, instructions); + } si_add_split_disasm(shader->binary.disasm_string, start_addr, &num_inst, instructions); if (shader->epilog) { si_add_split_disasm(shader->epilog->binary.disasm_string, start_addr, &num_inst, instructions); } fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", si_get_shader_name(shader, shader->selector->type)); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 9e51622..27cd0f2 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -6341,41 +6341,47 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx, } static unsigned si_get_shader_binary_size(struct si_shader *shader) { unsigned size = shader->binary.code_size; if (shader->prolog) size += shader->prolog->binary.code_size; if (shader->previous_stage) size += shader->previous_stage->binary.code_size; + if (shader->prolog2) + size += shader->prolog2->binary.code_size; if (shader->epilog) size += shader->epilog->binary.code_size; return size; } int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) { const struct ac_shader_binary *prolog = shader->prolog ? &shader->prolog->binary : NULL; const struct ac_shader_binary *previous_stage = shader->previous_stage ? &shader->previous_stage->binary : NULL; + const struct ac_shader_binary *prolog2 = + shader->prolog2 ? &shader->prolog2->binary : NULL; const struct ac_shader_binary *epilog = shader->epilog ? &shader->epilog->binary : NULL; const struct ac_shader_binary *mainb = &shader->binary; unsigned bo_size = si_get_shader_binary_size(shader) + (!epilog ? mainb->rodata_size : 0); unsigned char *ptr; assert(!prolog || !prolog->rodata_size); assert(!previous_stage || !previous_stage->rodata_size); - assert((!prolog && !previous_stage && !epilog) || !mainb->rodata_size); + assert(!prolog2 || !prolog2->rodata_size); + assert((!prolog && !previous_stage && !prolog2 && !epilog) || + !mainb->rodata_size); assert(!epilog || !epilog->rodata_size); /* GFX9 can fetch at most 128 bytes past the end of the shader. * Prevent VM faults. */ if (sscreen->b.chip_class >= GFX9) bo_size += 128; r600_resource_reference(&shader->bo, NULL); shader->bo = (struct r600_resource*) @@ -6391,20 +6397,24 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) if (prolog) { util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size); ptr += prolog->code_size; } if (previous_stage) { util_memcpy_cpu_to_le32(ptr, previous_stage->code, previous_stage->code_size); ptr += previous_stage->code_size; } + if (prolog2) { + util_memcpy_cpu_to_le32(ptr, prolog2->code, prolog2->code_size); + ptr += prolog2->code_size; + } util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size); ptr += mainb->code_size; if (epilog) util_memcpy_cpu_to_le32(ptr, epilog->code, epilog->code_size); else if (mainb->rodata_size > 0) util_memcpy_cpu_to_le32(ptr, mainb->rodata, mainb->rodata_size); sscreen->b.ws->buffer_unmap(shader->bo->buf); @@ -6601,20 +6611,23 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, (r600_can_dump_shader(&sscreen->b, processor) && !(sscreen->b.debug_flags & DBG_NO_ASM))) { fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor)); if (shader->prolog) si_shader_dump_disassembly(&shader->prolog->binary, debug, "prolog", file); if (shader->previous_stage) si_shader_dump_disassembly(&shader->previous_stage->binary, debug, "previous stage", file); + if (shader->prolog2) + si_shader_dump_disassembly(&shader->prolog2->binary, + debug, "prolog2", file); si_shader_dump_disassembly(&shader->binary, debug, "main", file); if (shader->epilog) si_shader_dump_disassembly(&shader->epilog->binary, debug, "epilog", file); fprintf(file, "\n"); } si_shader_dump_stats(sscreen, shader, debug, processor, file, @@ -9138,20 +9151,26 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, shader->previous_stage->config.spilled_vgprs); shader->config.private_mem_vgprs = MAX2(shader->config.private_mem_vgprs, shader->previous_stage->config.private_mem_vgprs); shader->config.scratch_bytes_per_wave = MAX2(shader->config.scratch_bytes_per_wave, shader->previous_stage->config.scratch_bytes_per_wave); shader->info.uses_instanceid |= shader->previous_stage->info.uses_instanceid; } + if (shader->prolog2) { + shader->config.num_sgprs = MAX2(shader->config.num_sgprs, + shader->prolog2->config.num_sgprs); + shader->config.num_vgprs = MAX2(shader->config.num_vgprs, + shader->prolog2->config.num_vgprs); + } if (shader->epilog) { shader->config.num_sgprs = MAX2(shader->config.num_sgprs, shader->epilog->config.num_sgprs); shader->config.num_vgprs = MAX2(shader->config.num_vgprs, shader->epilog->config.num_vgprs); } } si_fix_resource_usage(sscreen, shader); si_shader_dump(sscreen, shader, debug, sel->info.processor, diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 39eee86..76f7743 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -478,20 +478,21 @@ struct si_shader_info { }; struct si_shader { struct si_compiler_ctx_state compiler_ctx_state; struct si_shader_selector *selector; struct si_shader *next_variant; struct si_shader_part *prolog; struct si_shader *previous_stage; /* for GFX9 */ + struct si_shader_part *prolog2; struct si_shader_part *epilog; struct si_pm4_state *pm4; struct r600_resource *bo; struct r600_resource *scratch_bo; struct si_shader_key key; struct util_queue_fence optimized_ready; bool compilation_failed; bool is_monolithic; bool is_optimized; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev