Mesa (master): i965: Make a helper for finding an existing shader variant.
Module: Mesa Branch: master Commit: f9edc550b2bb76f77e33b6cb122a91f266bc5958 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f9edc550b2bb76f77e33b6cb122a91f266bc5958 Author: Kenneth GraunkeDate: Fri Nov 11 13:31:06 2016 -0800 i965: Make a helper for finding an existing shader variant. We had five copies of the same "walk the cache and look for an existing shader variant for this program" code. Now we have one helper function that returns the key. Signed-off-by: Kenneth Graunke Reviewed-by: Eduardo Lima Mitev --- src/mesa/drivers/dri/i965/brw_gs.c| 22 src/mesa/drivers/dri/i965/brw_program_cache.c | 38 +++ src/mesa/drivers/dri/i965/brw_state.h | 5 src/mesa/drivers/dri/i965/brw_tcs.c | 22 src/mesa/drivers/dri/i965/brw_tes.c | 22 src/mesa/drivers/dri/i965/brw_vs.c| 22 src/mesa/drivers/dri/i965/brw_wm.c| 22 7 files changed, 68 insertions(+), 85 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index b7fb9f9..2996203 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -40,26 +40,14 @@ static void brw_gs_debug_recompile(struct brw_context *brw, struct gl_program *prog, const struct brw_gs_prog_key *key) { - struct brw_cache_item *c = NULL; - const struct brw_gs_prog_key *old_key = NULL; - bool found = false; - perf_debug("Recompiling geometry shader for program %d\n", prog->Id); - for (unsigned int i = 0; i < brw->cache.size; i++) { - for (c = brw->cache.items[i]; c; c = c->next) { - if (c->cache_id == BRW_CACHE_GS_PROG) { -old_key = c->key; - -if (old_key->program_string_id == key->program_string_id) - break; - } - } - if (c) - break; - } + bool found = false; + const struct brw_gs_prog_key *old_key = + brw_find_previous_compile(>cache, BRW_CACHE_GS_PROG, +key->program_string_id); - if (!c) { + if (!old_key) { perf_debug(" Didn't find previous compile in the shader cache for " "debug\n"); return; diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c index 3947904a..3d95372 100644 --- a/src/mesa/drivers/dri/i965/brw_program_cache.c +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c @@ -55,6 +55,27 @@ #define FILE_DEBUG_FLAG DEBUG_STATE +static unsigned +get_program_string_id(enum brw_cache_id cache_id, const void *key) +{ + switch (cache_id) { + case BRW_CACHE_VS_PROG: + return ((struct brw_vs_prog_key *) key)->program_string_id; + case BRW_CACHE_TCS_PROG: + return ((struct brw_tcs_prog_key *) key)->program_string_id; + case BRW_CACHE_TES_PROG: + return ((struct brw_tes_prog_key *) key)->program_string_id; + case BRW_CACHE_GS_PROG: + return ((struct brw_gs_prog_key *) key)->program_string_id; + case BRW_CACHE_CS_PROG: + return ((struct brw_cs_prog_key *) key)->program_string_id; + case BRW_CACHE_FS_PROG: + return ((struct brw_wm_prog_key *) key)->program_string_id; + default: + unreachable("no program string id for this kind of program"); + } +} + static GLuint hash_key(struct brw_cache_item *item) { @@ -268,6 +289,23 @@ brw_alloc_item_data(struct brw_cache *cache, uint32_t size) return offset; } +const void * +brw_find_previous_compile(struct brw_cache *cache, + enum brw_cache_id cache_id, + unsigned program_string_id) +{ + for (unsigned i = 0; i < cache->size; i++) { + for (struct brw_cache_item *c = cache->items[i]; c; c = c->next) { + if (c->cache_id == cache_id && + get_program_string_id(cache_id, c->key) == program_string_id) { +return c->key; + } + } + } + + return NULL; +} + void brw_upload_cache(struct brw_cache *cache, enum brw_cache_id cache_id, diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 176557b..bd82212 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -235,6 +235,11 @@ bool brw_search_cache(struct brw_cache *cache, const void *key, GLuint key_size, uint32_t *inout_offset, void *inout_aux); + +const void *brw_find_previous_compile(struct brw_cache *cache, + enum brw_cache_id cache_id, + unsigned program_string_id); + void brw_program_cache_check_size(struct brw_context *brw); void brw_init_caches( struct brw_context *brw ); diff --git
Mesa (master): i965: Move program cache printing to brw_program_cache.c.
Module: Mesa Branch: master Commit: ce892392948e18241a872878873dbdd46e546fb2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ce892392948e18241a872878873dbdd46e546fb2 Author: Kenneth GraunkeDate: Fri Nov 11 14:47:53 2016 -0800 i965: Move program cache printing to brw_program_cache.c. It makes sense to put a function which prints out the entire contents of the program cache in the file that implements the program cache. Signed-off-by: Kenneth Graunke Reviewed-by: Eduardo Lima Mitev --- src/mesa/drivers/dri/i965/brw_program_cache.c | 46 + src/mesa/drivers/dri/i965/brw_state.h | 2 + src/mesa/drivers/dri/i965/brw_state_dump.c| 58 +-- 3 files changed, 49 insertions(+), 57 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c index 3d95372..8939fb1 100644 --- a/src/mesa/drivers/dri/i965/brw_program_cache.c +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c @@ -480,3 +480,49 @@ brw_destroy_caches(struct brw_context *brw) { brw_destroy_cache(brw, >cache); } + +static const char * +cache_name(enum brw_cache_id cache_id) +{ + switch (cache_id) { + case BRW_CACHE_VS_PROG: + return "VS kernel"; + case BRW_CACHE_TCS_PROG: + return "TCS kernel"; + case BRW_CACHE_TES_PROG: + return "TES kernel"; + case BRW_CACHE_FF_GS_PROG: + return "Fixed-function GS kernel"; + case BRW_CACHE_GS_PROG: + return "GS kernel"; + case BRW_CACHE_CLIP_PROG: + return "CLIP kernel"; + case BRW_CACHE_SF_PROG: + return "SF kernel"; + case BRW_CACHE_FS_PROG: + return "FS kernel"; + case BRW_CACHE_CS_PROG: + return "CS kernel"; + default: + return "unknown"; + } +} + +void +brw_print_program_cache(struct brw_context *brw) +{ + const struct brw_cache *cache = >cache; + struct brw_cache_item *item; + + drm_intel_bo_map(cache->bo, false); + + for (unsigned i = 0; i < cache->size; i++) { + for (item = cache->items[i]; item; item = item->next) { + fprintf(stderr, "%s:\n", cache_name(i)); + brw_disassemble(>screen->devinfo, cache->bo->virtual, + item->offset, item->size, stderr); + } + } + + drm_intel_bo_unmap(cache->bo); +} diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index bd82212..f2349d8 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -245,6 +245,8 @@ void brw_program_cache_check_size(struct brw_context *brw); void brw_init_caches( struct brw_context *brw ); void brw_destroy_caches( struct brw_context *brw ); +void brw_print_program_cache(struct brw_context *brw); + /*** * brw_state_batch.c */ diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 1ed8aaa..13e76ec 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -718,62 +718,6 @@ static void dump_binding_table(struct brw_context *brw, uint32_t offset, } static void -dump_prog_cache(struct brw_context *brw) -{ - struct brw_cache *cache = >cache; - unsigned int b; - - drm_intel_bo_map(brw->cache.bo, false); - - for (b = 0; b < cache->size; b++) { - struct brw_cache_item *item; - - for (item = cache->items[b]; item; item = item->next) { -const char *name; - -switch (item->cache_id) { -case BRW_CACHE_VS_PROG: - name = "VS kernel"; - break; - case BRW_CACHE_TCS_PROG: -name = "TCS kernel"; -break; - case BRW_CACHE_TES_PROG: -name = "TES kernel"; -break; -case BRW_CACHE_FF_GS_PROG: - name = "Fixed-function GS kernel"; - break; - case BRW_CACHE_GS_PROG: -name = "GS kernel"; -break; -case BRW_CACHE_CLIP_PROG: - name = "CLIP kernel"; - break; -case BRW_CACHE_SF_PROG: - name = "SF kernel"; - break; -case BRW_CACHE_FS_PROG: - name = "FS kernel"; - break; - case BRW_CACHE_CS_PROG: -name = "CS kernel"; -break; -default: - name = "unknown"; - break; -} - - fprintf(stderr, "%s:\n", name); - brw_disassemble(>screen->devinfo, brw->cache.bo->virtual, - item->offset, item->size, stderr); - } - } - - drm_intel_bo_unmap(brw->cache.bo); -} - -static void dump_state_batch(struct brw_context *brw) { int i; @@ -880,5 +824,5 @@ void brw_debug_batch(struct brw_context *brw) drm_intel_bo_unmap(brw->batch.bo); if (0) - dump_prog_cache(brw); + brw_print_program_cache(brw); }
Mesa (master): i965: Don't map/ unmap in brw_print_program_cache on LLC platforms.
Module: Mesa Branch: master Commit: aa291c3ba9b1062d219276cef12b1b7c5380b423 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=aa291c3ba9b1062d219276cef12b1b7c5380b423 Author: Kenneth GraunkeDate: Fri Nov 11 18:05:14 2016 -0800 i965: Don't map/unmap in brw_print_program_cache on LLC platforms. We have a persistent mapping. Don't map it a second time or try to unmap it. Just use the pointer. This most likely would wreak havoc except that this code is unused (it's only called from an if (0) debug block). Signed-off-by: Kenneth Graunke Reviewed-by: Eduardo Lima Mitev --- src/mesa/drivers/dri/i965/brw_program_cache.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c index 8939fb1..44d9994 100644 --- a/src/mesa/drivers/dri/i965/brw_program_cache.c +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c @@ -514,7 +514,8 @@ brw_print_program_cache(struct brw_context *brw) const struct brw_cache *cache = >cache; struct brw_cache_item *item; - drm_intel_bo_map(cache->bo, false); + if (!brw->has_llc) + drm_intel_bo_map(cache->bo, false); for (unsigned i = 0; i < cache->size; i++) { for (item = cache->items[i]; item; item = item->next) { @@ -524,5 +525,6 @@ brw_print_program_cache(struct brw_context *brw) } } - drm_intel_bo_unmap(cache->bo); + if (!brw->has_llc) + drm_intel_bo_unmap(cache->bo); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Make copy propagation not panic when it sees an intrinsic.
Module: Mesa Branch: master Commit: e7d4008ebfe561ee0aa3df6cdcfd39a8842ed659 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e7d4008ebfe561ee0aa3df6cdcfd39a8842ed659 Author: Kenneth GraunkeDate: Fri Dec 9 19:06:06 2016 -0800 glsl: Make copy propagation not panic when it sees an intrinsic. A number of games have large arrays of constants, which we promote to uniforms. This introduces copies from the uniform array to the original temporary array. Normally, copy propagation eliminates those copies, making everything refer to the uniform array directly. A number of shaders in "Deus Ex: Mankind Divided" recently exposed a limitation of copy propagation - if we had any intrinsics (i.e. image access in a compute shader), we weren't able to get rid of these copies. That meant that any variable indexing remained on the temporary array rather being moved to the uniform array. i965's scalar backend currently doesn't support indirect addressing of temporary arrays, which meant lowering it to if-ladders. This was horrible. According to Marek, on radeonsi/GCN, "F1 2015" uses 64% less spilled-temp-array memory. On i965/Skylake: total instructions in shared programs: 13362954 -> 13329878 (-0.25%) instructions in affected programs: 43745 -> 10669 (-75.61%) helped: 12 HURT: 0 total cycles in shared programs: 248081010 -> 245949178 (-0.86%) cycles in affected programs: 4597930 -> 2466098 (-46.37%) helped: 12 HURT: 0 total spills in shared programs: 9493 -> 9507 (0.15%) spills in affected programs: 25 -> 39 (56.00%) helped: 0 HURT: 1 total fills in shared programs: 12127 -> 12197 (0.58%) fills in affected programs: 110 -> 180 (63.64%) helped: 0 HURT: 1 Helps Deus Ex: Mankind Divided. The one shader with hurt spills/fills is from Tomb Raider at Ultra settings, but that same shader has a -39.55% reduction in instructions and -14.09% reduction in cycle counts, so it seems like a win there as well. Signed-off-by: Kenneth Graunke Reviewed-by: Timothy Arceri Reviewed-by: Matt Turner --- src/compiler/glsl/opt_copy_propagation.cpp | 31 ++ 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/compiler/glsl/opt_copy_propagation.cpp b/src/compiler/glsl/opt_copy_propagation.cpp index 247c498..2240421 100644 --- a/src/compiler/glsl/opt_copy_propagation.cpp +++ b/src/compiler/glsl/opt_copy_propagation.cpp @@ -186,11 +186,34 @@ ir_copy_propagation_visitor::visit_enter(ir_call *ir) } } - /* Since we're unlinked, we don't (necessarily) know the side effects of -* this call. So kill all copies. + /* Since this pass can run when unlinked, we don't (necessarily) know +* the side effects of calls. (When linked, most calls are inlined +* anyway, so it doesn't matter much.) +* +* One place where this does matter is IR intrinsics. They're never +* inlined. We also know what they do - while some have side effects +* (such as image writes), none edit random global variables. So we +* can assume they're side-effect free (other than the return value +* and out parameters). */ - _mesa_hash_table_clear(acp, NULL); - this->killed_all = true; + if (!ir->callee->is_intrinsic()) { + _mesa_hash_table_clear(acp, NULL); + this->killed_all = true; + } else { + if (ir->return_deref) + kill(ir->return_deref->var); + + foreach_two_lists(formal_node, >callee->parameters, +actual_node, >actual_parameters) { + ir_variable *sig_param = (ir_variable *) formal_node; + if (sig_param->data.mode == ir_var_function_out || + sig_param->data.mode == ir_var_function_inout) { +ir_rvalue *ir = (ir_rvalue *) actual_node; +ir_variable *var = ir->variable_referenced(); +kill(var); + } + } + } return visit_continue_with_parent; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: Make DCE set null destinations on messages with side effects.
Module: Mesa Branch: master Commit: 9919542f1cfff70524bc6117d19bf88e59159caa URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9919542f1cfff70524bc6117d19bf88e59159caa Author: Kenneth GraunkeDate: Sat Jan 14 23:32:12 2017 -0800 i965: Make DCE set null destinations on messages with side effects. (Co-authored by Matt Turner.) Image atomics, for example, return a value - but the shader may not want to use it. We assigned a useless VGRF destination. This seemed harmless, but it can actually be quite harmful. The register allocator has to assign that VGRF to a real register. It may assign the same actual GRF to the destination of an instruction that follows soon after. This results in a write-after-write (WAW) dependency, and stall. A number of "Deus Ex: Mankind Divided" shaders use image atomics, but don't use the return value. Several of these were hitting WAW stalls for nearly 14,000 (poorly estimated) cycles a pop. Making dead code elimination null out the destination avoids this issue. This patch cuts one shader's estimated cycles by -98.39%! Removing the message response should also help with data cluster bandwidth. On Skylake: (instruction counts remain identical) total cycles in shared programs: 255413890 -> 248081010 (-2.87%) cycles in affected programs: 12019948 -> 4687068 (-61.01%) helped: 24 HURT: 10 v2: Make can_omit_write independent of can_eliminate (Curro). Signed-off-by: Kenneth Graunke Reviewed-by: Francisco Jerez Reviewed-by: Matt Turner --- .../dri/i965/brw_fs_dead_code_eliminate.cpp| 54 -- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp index 0dd6091..7adb427 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp @@ -34,6 +34,42 @@ * yet in the tail end of this block. */ +/** + * Is it safe to eliminate the instruction? + */ +static bool +can_eliminate(const fs_inst *inst, BITSET_WORD *flag_live) +{ +return !inst->is_control_flow() && + !inst->has_side_effects() && + !(flag_live[0] & inst->flags_written()) && + !inst->writes_accumulator; +} + +/** + * Is it safe to omit the write, making the destination ARF null? + */ +static bool +can_omit_write(const fs_inst *inst) +{ + switch (inst->opcode) { + case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_TYPED_ATOMIC: + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: + return true; + default: + /* We can eliminate the destination write for ordinary instructions, + * but not most SENDs. + */ + if (inst->opcode < 128 && inst->mlen == 0) + return true; + + /* It might not be safe for other virtual opcodes. */ + return false; + } +} + bool fs_visitor::dead_code_eliminate() { @@ -52,29 +88,21 @@ fs_visitor::dead_code_eliminate() sizeof(BITSET_WORD)); foreach_inst_in_block_reverse_safe(fs_inst, inst, block) { - if (inst->dst.file == VGRF && !inst->has_side_effects()) { + if (inst->dst.file == VGRF) { const unsigned var = live_intervals->var_from_reg(inst->dst); bool result_live = false; for (unsigned i = 0; i < regs_written(inst); i++) result_live |= BITSET_TEST(live, var + i); -if (!result_live) { +if (!result_live && +(can_omit_write(inst) || can_eliminate(inst, flag_live))) { + inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type)); progress = true; - - if (inst->writes_accumulator || inst->flags_written()) { - inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type)); - } else { - inst->opcode = BRW_OPCODE_NOP; - } } } - if (inst->dst.is_null() && - !inst->is_control_flow() && - !inst->has_side_effects() && - !(flag_live[0] & inst->flags_written()) && - !inst->writes_accumulator) { + if (inst->dst.is_null() && can_eliminate(inst, flag_live)) { inst->opcode = BRW_OPCODE_NOP; progress = true; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: Make DCE explicitly not eliminate any control flow instructions.
Module: Mesa Branch: master Commit: be5f53e769deb936509efd6f0576b15b7a5432b9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=be5f53e769deb936509efd6f0576b15b7a5432b9 Author: Kenneth GraunkeDate: Tue Jan 17 19:15:50 2017 -0800 i965: Make DCE explicitly not eliminate any control flow instructions. According to Matt, the dead code pass explicitly avoided IF and WHILE because on Sandybridge, these could have conditional modifiers and null destination registers. Normally, those instructions use BAD_FILE for the destination register. Nowadays, we don't do that anymore, so we could technically drop these checks. However, it's clearer to explicitly leave control flow instructions alone, so change it to the more generic !inst->is_control_flow(). This should have no actual change. [This patch implements review feedback from Curro and Matt.] Signed-off-by: Kenneth Graunke Reviewed-by: Francisco Jerez Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp index 8a0469a..04901a9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp @@ -77,9 +77,8 @@ fs_visitor::dead_code_eliminate() } } - if ((inst->opcode != BRW_OPCODE_IF && - inst->opcode != BRW_OPCODE_WHILE) && - inst->dst.is_null() && + if (inst->dst.is_null() && + !inst->is_control_flow() && !inst->has_side_effects() && !inst->flags_written() && !inst->writes_accumulator) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: Combine some dead code elimination NOP'ing code.
Module: Mesa Branch: master Commit: 90bf39cd2b39874557a7c492d92b85945d45f3c6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=90bf39cd2b39874557a7c492d92b85945d45f3c6 Author: Kenneth GraunkeDate: Wed Dec 14 17:56:35 2016 -0800 i965: Combine some dead code elimination NOP'ing code. In theory we might have incorrectly NOP'd instructions that write the flag, but where that flag value isn't used, and yet the instruction either writes the accumulator or has side effects. I don't believe any such instructions exist, so this is mostly a code cleanup. Curro pointed out that FS_OPCODE_FB_WRITE has a null destination and actually writes the flag on Gen4-5 to dynamically decide whether to write some payload data. The hunk removed in this patch might have NOP'd it, except that we don't actually mark flags_written() in the IR, so it doesn't think the flag is touched at all. That's sketchy, but it means it wouldn't hit this today (though there are likely other problems!). v2: Properly replace the inst->regs_written() check in the second hunk with the flag being live (mistake caught by Curro). Signed-off-by: Kenneth Graunke Reviewed-by: Francisco Jerez Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp | 9 + 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp index 04901a9..0dd6091 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp @@ -70,17 +70,10 @@ fs_visitor::dead_code_eliminate() } } - if (inst->dst.is_null() && inst->flags_written()) { -if (!(flag_live[0] & inst->flags_written())) { - inst->opcode = BRW_OPCODE_NOP; - progress = true; -} - } - if (inst->dst.is_null() && !inst->is_control_flow() && !inst->has_side_effects() && - !inst->flags_written() && + !(flag_live[0] & inst->flags_written()) && !inst->writes_accumulator) { inst->opcode = BRW_OPCODE_NOP; progress = true; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: disable vertex reuse when writing viewport index
Module: Mesa Branch: master Commit: aac562f112ea9194b416c97336dcbbd3c1da812b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=aac562f112ea9194b416c97336dcbbd3c1da812b Author: Dave AirlieDate: Wed Jan 18 06:26:31 2017 +1000 radv: disable vertex reuse when writing viewport index This fixes some issues we'd hit later if using viewport indexes. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_cmd_buffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 27fa405..c6f238b 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -511,6 +511,8 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer, cull_dist_mask << 8 | clip_dist_mask); + radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF, + S_028AB4_REUSE_OFF(vs->info.vs.writes_viewport_index)); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/ac: switch an if to switch
Module: Mesa Branch: master Commit: 5dadd7ca27da6cd5bbac95c8e09130ec4a384e2b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5dadd7ca27da6cd5bbac95c8e09130ec4a384e2b Author: Dave AirlieDate: Tue Jan 17 08:38:14 2017 +1000 radv/ac: switch an if to switch makes it easier to add other shader stages. Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Edward O'Callaghan Signed-off-by: Dave Airlie --- src/amd/common/ac_nir_to_llvm.c | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 3173aa5..6d98fde 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -4626,11 +4626,16 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm, /* +3 for scratch wave offset and VCC */ config->num_sgprs = MAX2(config->num_sgprs, shader_info->num_input_sgprs + 3); - if (nir->stage == MESA_SHADER_COMPUTE) { + + switch (nir->stage) { + case MESA_SHADER_COMPUTE: for (int i = 0; i < 3; ++i) shader_info->cs.block_size[i] = nir->info->cs.local_size[i]; - } - - if (nir->stage == MESA_SHADER_FRAGMENT) + break; + case MESA_SHADER_FRAGMENT: shader_info->fs.early_fragment_test = nir->info->fs.early_fragment_tests; + break; + default: + break; + } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: add support for writing layer/viewport index (v2)
Module: Mesa Branch: master Commit: 6b635bbe16c93ad13afa3390d20c2f0f033e065d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6b635bbe16c93ad13afa3390d20c2f0f033e065d Author: Dave AirlieDate: Tue Jan 17 07:04:52 2017 +1000 radv: add support for writing layer/viewport index (v2) This just adds the infrastructure to allow writing layer and viewport index. It's just a first patch out of the geom shader tree, and doesn't do much on its own. v2: add missing if statement change (Bas) Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/common/ac_nir_to_llvm.c | 21 ++--- src/amd/common/ac_nir_to_llvm.h | 2 ++ src/amd/vulkan/radv_cmd_buffer.c | 6 +- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 798ddca..3173aa5 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -4107,7 +4107,7 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) unsigned pos_idx, num_pos_exports = 0; LLVMValueRef args[9]; LLVMValueRef pos_args[4][9] = { { 0 } }; - LLVMValueRef psize_value = 0; + LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL; int i; const uint64_t clip_mask = ctx->output_mask & ((1ull << VARYING_SLOT_CLIP_DIST0) | (1ull << VARYING_SLOT_CLIP_DIST1) | @@ -4167,6 +4167,14 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) ctx->shader_info->vs.writes_pointsize = true; psize_value = values[0]; continue; + } else if (i == VARYING_SLOT_LAYER) { + ctx->shader_info->vs.writes_layer = true; + layer_value = values[0]; + continue; + } else if (i == VARYING_SLOT_VIEWPORT) { + ctx->shader_info->vs.writes_viewport_index = true; + viewport_index_value = values[0]; + continue; } else if (i >= VARYING_SLOT_VAR0) { ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0); target = V_008DFC_SQ_EXP_PARAM + param_count; @@ -4200,8 +4208,11 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) pos_args[0][8] = ctx->f32one; /* W */ } - if (ctx->shader_info->vs.writes_pointsize == true) { - pos_args[1][0] = LLVMConstInt(ctx->i32, (ctx->shader_info->vs.writes_pointsize == true), false); /* writemask */ + uint32_t mask = ((ctx->shader_info->vs.writes_pointsize == true ? 1 : 0) | +(ctx->shader_info->vs.writes_layer == true ? 4 : 0) | +(ctx->shader_info->vs.writes_viewport_index == true ? 8 : 0)); + if (mask) { + pos_args[1][0] = LLVMConstInt(ctx->i32, mask, false); /* writemask */ pos_args[1][1] = ctx->i32zero; /* EXEC mask */ pos_args[1][2] = ctx->i32zero; /* last export? */ pos_args[1][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 1, false); @@ -4213,6 +4224,10 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) if (ctx->shader_info->vs.writes_pointsize == true) pos_args[1][5] = psize_value; + if (ctx->shader_info->vs.writes_layer == true) + pos_args[1][7] = layer_value; + if (ctx->shader_info->vs.writes_viewport_index == true) + pos_args[1][8] = viewport_index_value; } for (i = 0; i < 4; i++) { if (pos_args[i][0]) diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h index f488c09..a57558e 100644 --- a/src/amd/common/ac_nir_to_llvm.h +++ b/src/amd/common/ac_nir_to_llvm.h @@ -95,6 +95,8 @@ struct ac_shader_variant_info { unsigned vgpr_comp_cnt; uint32_t export_mask; bool writes_pointsize; + bool writes_layer; + bool writes_viewport_index; uint8_t clip_dist_mask; uint8_t cull_dist_mask; } vs; diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 01e77f8..27fa405 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -500,7 +500,11 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer, radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL, S_02881C_USE_VTX_POINT_SIZE(vs->info.vs.writes_pointsize) | - S_02881C_VS_OUT_MISC_VEC_ENA(vs->info.vs.writes_pointsize) | +
Mesa (master): radv: add support for layered clears (v2)
Module: Mesa Branch: master Commit: 7e0382fb35960416459134f27fa1b0f57aba8acc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7e0382fb35960416459134f27fa1b0f57aba8acc Author: Dave AirlieDate: Tue Jan 17 10:05:07 2017 +1000 radv: add support for layered clears (v2) Just always use the layer clear pipelines, the overhead of emitting the layer shouldn't be too large. v2: Bas suggested we always use it. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_meta_clear.c | 28 ++-- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index ff779ea..957b388 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -98,6 +98,16 @@ build_color_shaders(struct nir_shader **out_vs, nir_copy_var(_b, vs_out_color, vs_in_color); nir_copy_var(_b, fs_out_color, fs_in_color); + const struct glsl_type *layer_type = glsl_int_type(); + nir_variable *vs_out_layer = + nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, + "v_layer"); + vs_out_layer->data.location = VARYING_SLOT_LAYER; + vs_out_layer->data.interpolation = INTERP_MODE_FLAT; + nir_ssa_def *inst_id = nir_load_system_value(_b, nir_intrinsic_load_instance_id, 0); + + nir_store_var(_b, vs_out_layer, inst_id, 0x1); + *out_vs = vs_b.shader; *out_fs = fs_b.shader; } @@ -447,7 +457,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer, pipeline_h); } - radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); + radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, 0); radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false); } @@ -477,6 +487,15 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs nir_copy_var(_b, vs_out_pos, vs_in_pos); + const struct glsl_type *layer_type = glsl_int_type(); + nir_variable *vs_out_layer = + nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, + "v_layer"); + vs_out_layer->data.location = VARYING_SLOT_LAYER; + vs_out_layer->data.interpolation = INTERP_MODE_FLAT; + nir_ssa_def *inst_id = nir_load_system_value(_b, nir_intrinsic_load_instance_id, 0); + nir_store_var(_b, vs_out_layer, inst_id, 0x1); + *out_vs = vs_b.shader; *out_fs = fs_b.shader; } @@ -717,7 +736,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, if (depth_view_can_fast_clear(iview, subpass->depth_stencil_attachment.layout, clear_rect)) radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects); - radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); + radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, 0); } @@ -948,13 +967,10 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer) radv_meta_save_graphics_reset_vport_scissor(_state, cmd_buffer); - if (cmd_state->framebuffer->layers > 1) - radv_finishme("clearing multi-layer framebuffer"); - VkClearRect clear_rect = { .rect = cmd_state->render_area, .baseArrayLayer = 0, - .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + .layerCount = cmd_state->framebuffer->layers, }; for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/ac: split part of llvm compile into a separate function
Module: Mesa Branch: master Commit: 788610081198260d6974f86ed62a4b9aaf59b8c4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=788610081198260d6974f86ed62a4b9aaf59b8c4 Author: Dave AirlieDate: Tue Jan 17 08:41:03 2017 +1000 radv/ac: split part of llvm compile into a separate function This is needed to have common code for gs copy shader emission. Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Edward O'Callaghan Signed-off-by: Dave Airlie --- src/amd/common/ac_nir_to_llvm.c | 33 ++--- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 6d98fde..26b87e8 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -4557,17 +4557,14 @@ out: return retval; } -void ac_compile_nir_shader(LLVMTargetMachineRef tm, - struct ac_shader_binary *binary, - struct ac_shader_config *config, - struct ac_shader_variant_info *shader_info, - struct nir_shader *nir, - const struct ac_nir_compiler_options *options, - bool dump_shader) +static void ac_compile_llvm_module(LLVMTargetMachineRef tm, + LLVMModuleRef llvm_module, + struct ac_shader_binary *binary, + struct ac_shader_config *config, + struct ac_shader_variant_info *shader_info, + gl_shader_stage stage, + bool dump_shader) { - - LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info, -options); if (dump_shader) LLVMDumpModule(llvm_module); @@ -4586,7 +4583,7 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm, LLVMDisposeModule(llvm_module); LLVMContextDispose(ctx); - if (nir->stage == MESA_SHADER_FRAGMENT) { + if (stage == MESA_SHADER_FRAGMENT) { shader_info->num_input_vgprs = 0; if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr)) shader_info->num_input_vgprs += 2; @@ -4626,7 +4623,21 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm, /* +3 for scratch wave offset and VCC */ config->num_sgprs = MAX2(config->num_sgprs, shader_info->num_input_sgprs + 3); +} + +void ac_compile_nir_shader(LLVMTargetMachineRef tm, + struct ac_shader_binary *binary, + struct ac_shader_config *config, + struct ac_shader_variant_info *shader_info, + struct nir_shader *nir, + const struct ac_nir_compiler_options *options, + bool dump_shader) +{ + + LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info, +options); + ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader); switch (nir->stage) { case MESA_SHADER_COMPUTE: for (int i = 0; i < 3; ++i) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): ac/debug: Decrease num_dw for type 2 NOP's.
Module: Mesa Branch: master Commit: 3b4bf8aa636768f4ad5fb636b8406e58d0d78f62 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3b4bf8aa636768f4ad5fb636b8406e58d0d78f62 Author: Bas NieuwenhuizenDate: Sun Jan 15 23:01:03 2017 +0100 ac/debug: Decrease num_dw for type 2 NOP's. Otherwise we read past the end of the buffer. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Nicolai Hähnle --- src/amd/common/ac_debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c index f91e448..989dfda 100644 --- a/src/amd/common/ac_debug.c +++ b/src/amd/common/ac_debug.c @@ -357,6 +357,7 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id, if (ib[0] == 0x8000) { fprintf(f, COLOR_GREEN "NOP (type 2)" COLOR_RESET "\n"); ib++; + num_dw--; break; } /* fall through */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: for the tess barrier, only use emit_waitcnt on SI and LLVM 3.9+
Module: Mesa Branch: master Commit: 57f18623fb94891c04f3a395cfd977ea3747ee61 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=57f18623fb94891c04f3a395cfd977ea3747ee61 Author: Marek OlšákDate: Tue Jan 17 13:45:42 2017 +0100 radeonsi: for the tess barrier, only use emit_waitcnt on SI and LLVM 3.9+ Cc: 17.0 13.0 Reviewed-by: Edward O'Callaghan Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f404273..10f40a9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5440,10 +5440,13 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; - /* The real barrier instruction isn’t needed, because an entire patch + /* SI only (thanks to a hw bug workaround): +* The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. */ - if (ctx->type == PIPE_SHADER_TESS_CTRL) { + if (HAVE_LLVM >= 0x0309 && + ctx->screen->b.chip_class == SI && + ctx->type == PIPE_SHADER_TESS_CTRL) { emit_waitcnt(ctx, LGKM_CNT & VM_CNT); return; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (12.0): 37 new commits
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cc2894d376e75de5255ed9670bcae14524cb0801 Author: Emil VelikovDate: Thu Jan 12 17:18:51 2017 + automake: use shared llvm libs for make distcheck Cc: "12.0 13.0" Signed-off-by: Emil Velikov (cherry picked from commit 23dcce0c03db055c168696c9120637506b68b13d) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=febf22ff559604fca421a0555aeead5cbd8d4377 Author: Chad Versace Date: Fri Dec 16 12:05:45 2016 -0800 i965/mt: Disable HiZ when sharing depth buffer externally (v2) intel_miptree_make_shareable() discarded and disabled CCS. Fix it so that it discards and disables HiZ too. Fixes dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer on Skylake. v2: Actually do what the commit message says. Discard the HiZ buffer. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=98329 Reviewed-by: Topi Pohjolainen Reviewed-by: Kenneth Graunke Reviewed-by: Anuj Phogat Cc: Nanley Chery (cherry picked from commit 42011be1e27f59d750b781c10766e19ec0ee6ff5) [Emil Velikov: patch is a backport by Chad of above commit] URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c7b53bba3738480e3ddaf84a0386f72834e5428 Author: Chad Versace Date: Fri Dec 9 16:18:11 2016 -0800 i965/mt: Disable aux surfaces after making miptree shareable The entire goal of intel_miptree_make_shareable() is to permanently disable the miptree's aux surfaces. So set intel_mipmap_tree:disable_aux_buffers after the function's done with discarding down the aux surfaces. References: https://bugs.freedesktop.org/show_bug.cgi?id=98329 Reviewed-by: Topi Pohjolainen Reviewed-by: Kenneth Graunke Cc: Nanley Chery Cc: mesa-sta...@lists.freedesktop.org (cherry picked from commit 1c8be049bea786c2c054a770025976beba5b8636) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c880deef41d3c4cf2dc56342c3f8cc6edb6728ca Author: Emil Velikov Date: Fri Dec 16 15:08:30 2016 + get-typod-pick-list.sh: add new script Typos do happen as people nominate patches for stable. This script aims to catch most of those. Due to the subtle nature of things, one has to pay special attention to the output, similar to get-extra-pick-list.sh. At the moment only the following is handled: grep -i "CC:.*mesa-dev" Cc: 12.0 13.0 Signed-off-by: Emil Velikov (cherry picked from commit f0bdd13fdbc0bec1119b296d99820899183e26ab) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=09973d9a991919d010251a6e0998fa105374b654 Author: Ilia Mirkin Date: Tue Jan 10 22:07:53 2017 -0500 nouveau: take extra push space into account for pushbuf_space calls Ever since a long time ago when I messed around with fences, I ensure that after a PUSH_SPACE call there is enough space to write a fence out into the pushbuf. However the PUSH_SPACE macro is not all-knowing, and so sometimes we have to invoke nouveau_pushbuf_space manually with the relocs/pushes args set. If we don't take the extra allocation from PUSH_SPACE into account, then we will end up accidentally flushing when the code was not expecting a flush. This can lead to various runtime and rendering failures. The amount of extra allocation isn't that important - it has to be at least 8 based on the current nouveau_winsys.h setting, but even more won't hurt. I just rounded up to powers of 2. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99354 Cc: "12.0 13.0" Signed-off-by: Ilia Mirkin Acked-by: Ben Skeggs (cherry picked from commit eb60a89bc3ac2b43faf52d06e05670bbbca7292d) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=36a54c27fd01e186c777acb4c12c61a7a32c2838 Author: Kenneth Graunke Date: Sun Jan 8 23:03:25 2017 -0800 spirv: Move cursor before calling vtn_ssa_value() in phi 2nd pass. vtn_ssa_value() can produce variable loads, and the cursor might be after a return statement, causing nir_builder assert failures about not inserting instructions after a jump. This fixes: dEQP-VK.spirv_assembly.instruction.graphics.barrier.in_if
Mesa (master): st/vdpau: remove the delayed rendering hack(v1.1)
Module: Mesa Branch: master Commit: 3a8f316e7b7f7dc5d913d117ec47e26587ce8177 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3a8f316e7b7f7dc5d913d117ec47e26587ce8177 Author: Nayan DeshmukhDate: Wed Jan 11 22:45:15 2017 +0530 st/vdpau: remove the delayed rendering hack(v1.1) the hack was introduced to avoid an extra copying but now with dri3 we don't need it anymore v1.1: rebasing Signed-off-by: Nayan Deshmukh Acked-by: Christian König --- src/gallium/state_trackers/vdpau/bitmap.c| 2 - src/gallium/state_trackers/vdpau/device.c| 50 - src/gallium/state_trackers/vdpau/mixer.c | 93 +++- src/gallium/state_trackers/vdpau/output.c| 9 --- src/gallium/state_trackers/vdpau/presentation.c | 30 +++- src/gallium/state_trackers/vdpau/vdpau_private.h | 9 --- 6 files changed, 52 insertions(+), 141 deletions(-) diff --git a/src/gallium/state_trackers/vdpau/bitmap.c b/src/gallium/state_trackers/vdpau/bitmap.c index fd67a98..d9ec60d 100644 --- a/src/gallium/state_trackers/vdpau/bitmap.c +++ b/src/gallium/state_trackers/vdpau/bitmap.c @@ -198,8 +198,6 @@ vlVdpBitmapSurfacePutBitsNative(VdpBitmapSurface surface, pipe_mutex_lock(vlsurface->device->mutex); - vlVdpResolveDelayedRendering(vlsurface->device, NULL, NULL); - dst_box = RectToPipeBox(destination_rect, vlsurface->sampler_view->texture); pipe->texture_subdata(pipe, vlsurface->sampler_view->texture, 0, PIPE_TRANSFER_WRITE, _box, *source_data, diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c index 8bae064..4f4ffdf 100644 --- a/src/gallium/state_trackers/vdpau/device.c +++ b/src/gallium/state_trackers/vdpau/device.c @@ -327,53 +327,3 @@ vlVdpDefaultSamplerViewTemplate(struct pipe_sampler_view *templ, struct pipe_res if (desc->swizzle[3] == PIPE_SWIZZLE_0) templ->swizzle_a = PIPE_SWIZZLE_1; } - -void -vlVdpResolveDelayedRendering(vlVdpDevice *dev, struct pipe_surface *surface, struct u_rect *dirty_area) -{ - struct vl_compositor_state *cstate; - vlVdpOutputSurface *vlsurface; - - assert(dev); - - cstate = dev->delayed_rendering.cstate; - if (!cstate) - return; - - vlsurface = vlGetDataHTAB(dev->delayed_rendering.surface); - if (!vlsurface) - return; - - if (!surface) { - surface = vlsurface->surface; - dirty_area = >dirty_area; - } - - vl_compositor_render(cstate, >compositor, surface, dirty_area, true); - - dev->delayed_rendering.surface = VDP_INVALID_HANDLE; - dev->delayed_rendering.cstate = NULL; - - /* test if we need to create a new sampler for the just filled texture */ - if (surface->texture != vlsurface->sampler_view->texture) { - struct pipe_resource *res = surface->texture; - struct pipe_sampler_view sv_templ; - - vlVdpDefaultSamplerViewTemplate(_templ, res); - pipe_sampler_view_reference(>sampler_view, NULL); - vlsurface->sampler_view = dev->context->create_sampler_view(dev->context, res, _templ); - } - - return; -} - -void -vlVdpSave4DelayedRendering(vlVdpDevice *dev, VdpOutputSurface surface, struct vl_compositor_state *cstate) -{ - assert(dev); - - vlVdpResolveDelayedRendering(dev, NULL, NULL); - - dev->delayed_rendering.surface = surface; - dev->delayed_rendering.cstate = cstate; -} diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c index 1014174..37a6fcd 100644 --- a/src/gallium/state_trackers/vdpau/mixer.c +++ b/src/gallium/state_trackers/vdpau/mixer.c @@ -193,8 +193,6 @@ vlVdpVideoMixerDestroy(VdpVideoMixer mixer) pipe_mutex_lock(vmixer->device->mutex); - vlVdpResolveDelayedRendering(vmixer->device, NULL, NULL); - vlRemoveDataHTAB(mixer); vl_compositor_cleanup_state(>cstate); @@ -293,7 +291,6 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer, } pipe_mutex_lock(vmixer->device->mutex); - vlVdpResolveDelayedRendering(vmixer->device, NULL, NULL); vl_compositor_clear_layers(>cstate); @@ -403,64 +400,60 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer, ++layers; } - if (!vmixer->noise_reduction.filter && !vmixer->sharpness.filter && !vmixer->bicubic.filter) - vlVdpSave4DelayedRendering(vmixer->device, destination_surface, >cstate); - else { - vl_compositor_render(>cstate, compositor, surface, _area, true); - - if (vmixer->noise_reduction.filter) { - if (!vmixer->sharpness.filter && !vmixer->bicubic.filter) { -vl_median_filter_render(vmixer->noise_reduction.filter, -sampler_view, dst->surface); - } else { -res = pipe->screen->resource_create(pipe->screen, _tmpl); -struct pipe_sampler_view *sampler_view_temp = pipe->create_sampler_view(pipe, res, _templ); -struct
Mesa (master): vl/dri3: use external texture as back buffers(v4)
Module: Mesa Branch: master Commit: 0ef17d76bbbc9506d50138f1b4d79db8ef08ad6d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ef17d76bbbc9506d50138f1b4d79db8ef08ad6d Author: Nayan DeshmukhDate: Wed Jan 11 22:45:13 2017 +0530 vl/dri3: use external texture as back buffers(v4) dri3 allows us to send handle of a texture directly to X so this patch allows a state tracker to directly send its texture to X to be used as back buffer and avoids extra copying v2: use clip width/height to display a portion of the surface v3: remove redundant variables, fix wrapping, rename variables handle vaapi path v3.1: we need clip_width/height for every frame so we don't need to maintain it for each buffer instead use a global variable v4: In case of single gpu we can cache the buffers as applications use constant number of buffer and we can avoid calls to present extension for every frame Reviewed and Suggested-by: Leo Liu Acked-by: Christian König Tested-by: Andy Furniss Signed-off-by: Nayan Deshmukh --- configure.ac | 2 +- src/gallium/auxiliary/vl/vl_winsys.h | 5 ++ src/gallium/auxiliary/vl/vl_winsys_dri3.c | 126 ++ 3 files changed, 115 insertions(+), 18 deletions(-) diff --git a/configure.ac b/configure.ac index 459f3e8..3e2d79a 100644 --- a/configure.ac +++ b/configure.ac @@ -2081,7 +2081,7 @@ if test "x$enable_xvmc" = xyes -o \ "x$enable_va" = xyes; then if test x"$enable_dri3" = xyes; then PKG_CHECK_MODULES([VL], [xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED - x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED]) + xcb-xfixes x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED]) else PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED]) fi diff --git a/src/gallium/auxiliary/vl/vl_winsys.h b/src/gallium/auxiliary/vl/vl_winsys.h index 26db9f2..e1f9b27 100644 --- a/src/gallium/auxiliary/vl/vl_winsys.h +++ b/src/gallium/auxiliary/vl/vl_winsys.h @@ -59,6 +59,11 @@ struct vl_screen void * (*get_private)(struct vl_screen *vscreen); + void + (*set_back_texture_from_output)(struct vl_screen *vscreen, + struct pipe_resource *buffer, + uint32_t width, uint32_t height); + struct pipe_screen *pscreen; struct pipe_loader_device *dev; }; diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c b/src/gallium/auxiliary/vl/vl_winsys_dri3.c index 2929928..a810dea 100644 --- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c +++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "loader.h" @@ -71,9 +72,12 @@ struct vl_dri3_screen xcb_special_event_t *special_event; struct pipe_context *pipe; + struct pipe_resource *output_texture; + uint32_t clip_width, clip_height; struct vl_dri3_buffer *back_buffers[BACK_BUFFER_NUM]; int cur_back; + int next_back; struct u_rect dirty_areas[BACK_BUFFER_NUM]; @@ -105,7 +109,8 @@ dri3_free_back_buffer(struct vl_dri3_screen *scrn, xcb_free_pixmap(scrn->conn, buffer->pixmap); xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence); xshmfence_unmap_shm(buffer->shm_fence); - pipe_resource_reference(>texture, NULL); + if (!scrn->output_texture) + pipe_resource_reference(>texture, NULL); if (buffer->linear_texture) pipe_resource_reference(>linear_texture, NULL); FREE(buffer); @@ -236,29 +241,31 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn) templ.format = PIPE_FORMAT_B8G8R8X8_UNORM; templ.target = PIPE_TEXTURE_2D; templ.last_level = 0; - templ.width0 = scrn->width; - templ.height0 = scrn->height; + templ.width0 = (scrn->output_texture) ? + scrn->output_texture->width0 : scrn->width; + templ.height0 = (scrn->output_texture) ? + scrn->output_texture->height0 : scrn->height; templ.depth0 = 1; templ.array_size = 1; if (scrn->is_different_gpu) { - buffer->texture = scrn->base.pscreen->resource_create(scrn->base.pscreen, -); + buffer->texture = (scrn->output_texture) ? scrn->output_texture : + scrn->base.pscreen->resource_create(scrn->base.pscreen, ); if (!buffer->texture) goto unmap_shm; templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_LINEAR; - buffer->linear_texture = scrn->base.pscreen->resource_create(scrn->base.pscreen, - ); + buffer->linear_texture = + scrn->base.pscreen->resource_create(scrn->base.pscreen, );
Mesa (master): st/vdpau: use dri3 to directly send the buffer to X(v2)
Module: Mesa Branch: master Commit: 15bfdea99c7b487d2c38d6dd7b88fb44810ef75a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=15bfdea99c7b487d2c38d6dd7b88fb44810ef75a Author: Nayan DeshmukhDate: Wed Jan 11 22:45:14 2017 +0530 st/vdpau: use dri3 to directly send the buffer to X(v2) this avoids an extra copy which occurs in case of dri2 v1.1: fallback to dri2 if dri3 fails to initialize v2: add PIPE_BIND_SCANOUT to output buffers as they will be send to X server directly (Michel) Suggested-by: Christian König Tested-by: Andy Furniss Signed-off-by: Nayan Deshmukh --- src/gallium/state_trackers/vdpau/output.c | 2 +- src/gallium/state_trackers/vdpau/presentation.c | 58 ++--- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c index d67ead8..8ddf2c1 100644 --- a/src/gallium/state_trackers/vdpau/output.c +++ b/src/gallium/state_trackers/vdpau/output.c @@ -82,7 +82,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device, res_tmpl.depth0 = 1; res_tmpl.array_size = 1; res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET | - PIPE_BIND_SHARED; + PIPE_BIND_SHARED | PIPE_BIND_SCANOUT; res_tmpl.usage = PIPE_USAGE_DEFAULT; pipe_mutex_lock(dev->mutex); diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c index f35d73a..b2c8aea 100644 --- a/src/gallium/state_trackers/vdpau/presentation.c +++ b/src/gallium/state_trackers/vdpau/presentation.c @@ -231,43 +231,47 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue, vscreen = pq->device->vscreen; pipe_mutex_lock(pq->device->mutex); + if (vscreen->set_back_texture_from_output) + vscreen->set_back_texture_from_output(vscreen, surf->surface->texture, clip_width, clip_height); tex = vscreen->texture_from_drawable(vscreen, (void *)pq->drawable); if (!tex) { pipe_mutex_unlock(pq->device->mutex); return VDP_STATUS_INVALID_HANDLE; } - dirty_area = vscreen->get_dirty_area(vscreen); + if (!vscreen->set_back_texture_from_output) { + dirty_area = vscreen->get_dirty_area(vscreen); - memset(_templ, 0, sizeof(surf_templ)); - surf_templ.format = tex->format; - surf_draw = pipe->create_surface(pipe, tex, _templ); + memset(_templ, 0, sizeof(surf_templ)); + surf_templ.format = tex->format; + surf_draw = pipe->create_surface(pipe, tex, _templ); - dst_clip.x0 = 0; - dst_clip.y0 = 0; - dst_clip.x1 = clip_width ? clip_width : surf_draw->width; - dst_clip.y1 = clip_height ? clip_height : surf_draw->height; + dst_clip.x0 = 0; + dst_clip.y0 = 0; + dst_clip.x1 = clip_width ? clip_width : surf_draw->width; + dst_clip.y1 = clip_height ? clip_height : surf_draw->height; - if (pq->device->delayed_rendering.surface == surface && - dst_clip.x1 == surf_draw->width && dst_clip.y1 == surf_draw->height) { + if (pq->device->delayed_rendering.surface == surface && + dst_clip.x1 == surf_draw->width && dst_clip.y1 == surf_draw->height) { - // TODO: we correctly support the clipping here, but not the pq background color in the clipped area - cstate = pq->device->delayed_rendering.cstate; - vl_compositor_set_dst_clip(cstate, _clip); - vlVdpResolveDelayedRendering(pq->device, surf_draw, dirty_area); + // TODO: we correctly support the clipping here, but not the pq background color in the clipped area + cstate = pq->device->delayed_rendering.cstate; + vl_compositor_set_dst_clip(cstate, _clip); + vlVdpResolveDelayedRendering(pq->device, surf_draw, dirty_area); - } else { - vlVdpResolveDelayedRendering(pq->device, NULL, NULL); + } else { + vlVdpResolveDelayedRendering(pq->device, NULL, NULL); - src_rect.x0 = 0; - src_rect.y0 = 0; - src_rect.x1 = surf_draw->width; - src_rect.y1 = surf_draw->height; + src_rect.x0 = 0; + src_rect.y0 = 0; + src_rect.x1 = surf_draw->width; + src_rect.y1 = surf_draw->height; - vl_compositor_clear_layers(cstate); - vl_compositor_set_rgba_layer(cstate, compositor, 0, surf->sampler_view, _rect, NULL, NULL); - vl_compositor_set_dst_clip(cstate, _clip); - vl_compositor_render(cstate, compositor, surf_draw, dirty_area, true); + vl_compositor_clear_layers(cstate); + vl_compositor_set_rgba_layer(cstate, compositor, 0, surf->sampler_view, _rect, NULL, NULL); + vl_compositor_set_dst_clip(cstate, _clip); + vl_compositor_render(cstate, compositor, surf_draw, dirty_area, true); + } } vscreen->set_next_timestamp(vscreen, earliest_presentation_time); @@ -297,8 +301,10 @@