On Mon, May 16, 2016 at 9:04 PM, Francisco Jerez <curroje...@riseup.net> wrote:
> Jason Ekstrand <ja...@jlekstrand.net> writes: > > > On May 16, 2016 6:44 PM, "Francisco Jerez" <curroje...@riseup.net> > wrote: > >> > >> Jason Ekstrand <ja...@jlekstrand.net> writes: > >> > >> > This allows us to disable spilling for blorp shaders since blorp state > >> > setup doesn't handle spilling. Without this, blorp faisl hard if you > > run > >> > with INTEL_DEBUG=spill. > >> > > >> > Cc: Curro <curroje...@riseup.net> > >> > >> Reviewed-by: Francisco Jerez <curroje...@riseup.net> > > > > Did you test it? One of is probably should. :-p > > > I've just done a full piglit run on SKL with INTEL_DEBUG=spill_fs and > this patch applied on top of my spilling fixes. Didn't notice any > regressions so feel free to add my: > > Tested-by: Francisco Jerez <curroje...@riseup.net> > Thanks for both review and testing! Pushed. --Jason > > >> > --- > >> > src/intel/vulkan/anv_pipeline.c | 3 ++- > >> > src/mesa/drivers/dri/i965/brw_blorp.c | 2 +- > >> > src/mesa/drivers/dri/i965/brw_compiler.h | 1 + > >> > src/mesa/drivers/dri/i965/brw_fs.cpp | 29 > > +++++++++++++---------- > >> > src/mesa/drivers/dri/i965/brw_fs.h | 6 ++--- > >> > src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 ++-- > >> > src/mesa/drivers/dri/i965/brw_wm.c | 3 ++- > >> > 7 files changed, 28 insertions(+), 20 deletions(-) > >> > > >> > diff --git a/src/intel/vulkan/anv_pipeline.c > > b/src/intel/vulkan/anv_pipeline.c > >> > index a8e31b1..7d265d8 100644 > >> > --- a/src/intel/vulkan/anv_pipeline.c > >> > +++ b/src/intel/vulkan/anv_pipeline.c > >> > @@ -675,7 +675,8 @@ anv_pipeline_compile_fs(struct anv_pipeline > > *pipeline, > >> > unsigned code_size; > >> > const unsigned *shader_code = > >> > brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, > nir, > >> > - NULL, -1, -1, pipeline->use_repclear, > > &code_size, NULL); > >> > + NULL, -1, -1, true, pipeline->use_repclear, > >> > + &code_size, NULL); > >> > if (shader_code == NULL) { > >> > ralloc_free(mem_ctx); > >> > return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); > >> > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c > > b/src/mesa/drivers/dri/i965/brw_blorp.c > >> > index 09a0fd1..9590968 100644 > >> > --- a/src/mesa/drivers/dri/i965/brw_blorp.c > >> > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c > >> > @@ -223,7 +223,7 @@ brw_blorp_compile_nir_shader(struct brw_context > > *brw, struct nir_shader *nir, > >> > > >> > const unsigned *program = > >> > brw_compile_fs(compiler, brw, mem_ctx, wm_key, &wm_prog_data, > > nir, > >> > - NULL, -1, -1, use_repclear, program_size, NULL); > >> > + NULL, -1, -1, false, use_repclear, program_size, > > NULL); > >> > > >> > /* Copy the relavent bits of wm_prog_data over into the blorp prog > > data */ > >> > prog_data->dispatch_8 = wm_prog_data.dispatch_8; > >> > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h > > b/src/mesa/drivers/dri/i965/brw_compiler.h > >> > index a2148ae..15f32be 100644 > >> > --- a/src/mesa/drivers/dri/i965/brw_compiler.h > >> > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h > >> > @@ -789,6 +789,7 @@ brw_compile_fs(const struct brw_compiler > *compiler, > > void *log_data, > >> > struct gl_program *prog, > >> > int shader_time_index8, > >> > int shader_time_index16, > >> > + bool allow_spilling, > >> > bool use_rep_send, > >> > unsigned *final_assembly_size, > >> > char **error_str); > >> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > > b/src/mesa/drivers/dri/i965/brw_fs.cpp > >> > index 2de5533..55d10fc 100644 > >> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > >> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > >> > @@ -5462,7 +5462,7 @@ fs_visitor::fixup_3src_null_dest() > >> > } > >> > > >> > void > >> > -fs_visitor::allocate_registers() > >> > +fs_visitor::allocate_registers(bool allow_spilling) > >> > { > >> > bool allocated_without_spills; > >> > > >> > @@ -5472,6 +5472,8 @@ fs_visitor::allocate_registers() > >> > SCHEDULE_PRE_LIFO, > >> > }; > >> > > >> > + bool spill_all = allow_spilling && (INTEL_DEBUG & DEBUG_SPILL_FS); > >> > + > >> > /* Try each scheduling heuristic to see if it can successfully > > register > >> > * allocate without spilling. They should be ordered by > decreasing > >> > * performance but increasing likelihood of allocating. > >> > @@ -5483,7 +5485,7 @@ fs_visitor::allocate_registers() > >> > assign_regs_trivial(); > >> > allocated_without_spills = true; > >> > } else { > >> > - allocated_without_spills = assign_regs(false); > >> > + allocated_without_spills = assign_regs(false, spill_all); > >> > } > >> > if (allocated_without_spills) > >> > break; > >> > @@ -5508,12 +5510,14 @@ fs_visitor::allocate_registers() > >> > /* Since we're out of heuristics, just go spill registers until > > we > >> > * get an allocation. > >> > */ > >> > - while (!assign_regs(true)) { > >> > + while (!assign_regs(true, spill_all)) { > >> > if (failed) > >> > break; > >> > } > >> > } > >> > > >> > + assert(last_scratch == 0 || allow_spilling); > >> > + > >> > /* This must come after all optimization and register allocation, > > since > >> > * it inserts dead code that happens to have side effects, and it > > does > >> > * so based on the actual physical registers in use. > >> > @@ -5559,7 +5563,7 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes) > >> > assign_vs_urb_setup(); > >> > > >> > fixup_3src_null_dest(); > >> > - allocate_registers(); > >> > + allocate_registers(true); > >> > > >> > return !failed; > >> > } > >> > @@ -5641,7 +5645,7 @@ fs_visitor::run_tcs_single_patch() > >> > assign_tcs_single_patch_urb_setup(); > >> > > >> > fixup_3src_null_dest(); > >> > - allocate_registers(); > >> > + allocate_registers(true); > >> > > >> > return !failed; > >> > } > >> > @@ -5675,7 +5679,7 @@ fs_visitor::run_tes() > >> > assign_tes_urb_setup(); > >> > > >> > fixup_3src_null_dest(); > >> > - allocate_registers(); > >> > + allocate_registers(true); > >> > > >> > return !failed; > >> > } > >> > @@ -5724,13 +5728,13 @@ fs_visitor::run_gs() > >> > assign_gs_urb_setup(); > >> > > >> > fixup_3src_null_dest(); > >> > - allocate_registers(); > >> > + allocate_registers(true); > >> > > >> > return !failed; > >> > } > >> > > >> > bool > >> > -fs_visitor::run_fs(bool do_rep_send) > >> > +fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) > >> > { > >> > brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) > > this->prog_data; > >> > brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key; > >> > @@ -5794,7 +5798,7 @@ fs_visitor::run_fs(bool do_rep_send) > >> > assign_urb_setup(); > >> > > >> > fixup_3src_null_dest(); > >> > - allocate_registers(); > >> > + allocate_registers(allow_spilling); > >> > > >> > if (failed) > >> > return false; > >> > @@ -5837,7 +5841,7 @@ fs_visitor::run_cs() > >> > assign_curb_setup(); > >> > > >> > fixup_3src_null_dest(); > >> > - allocate_registers(); > >> > + allocate_registers(true); > >> > > >> > if (failed) > >> > return false; > >> > @@ -5962,6 +5966,7 @@ brw_compile_fs(const struct brw_compiler > > *compiler, void *log_data, > >> > const nir_shader *src_shader, > >> > struct gl_program *prog, > >> > int shader_time_index8, int shader_time_index16, > >> > + bool allow_spilling, > >> > bool use_rep_send, > >> > unsigned *final_assembly_size, > >> > char **error_str) > >> > @@ -6005,7 +6010,7 @@ brw_compile_fs(const struct brw_compiler > > *compiler, void *log_data, > >> > fs_visitor v8(compiler, log_data, mem_ctx, key, > >> > &prog_data->base, prog, shader, 8, > >> > shader_time_index8); > >> > - if (!v8.run_fs(false /* do_rep_send */)) { > >> > + if (!v8.run_fs(allow_spilling, false /* do_rep_send */)) { > >> > if (error_str) > >> > *error_str = ralloc_strdup(mem_ctx, v8.fail_msg); > >> > > >> > @@ -6023,7 +6028,7 @@ brw_compile_fs(const struct brw_compiler > > *compiler, void *log_data, > >> > &prog_data->base, prog, shader, 16, > >> > shader_time_index16); > >> > v16.import_uniforms(&v8); > >> > - if (!v16.run_fs(use_rep_send)) { > >> > + if (!v16.run_fs(allow_spilling, use_rep_send)) { > >> > compiler->shader_perf_log(log_data, > >> > "SIMD16 shader failed to compile: > > %s", > >> > v16.fail_msg); > >> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > > b/src/mesa/drivers/dri/i965/brw_fs.h > >> > index d4eb8fb..b6fe564 100644 > >> > --- a/src/mesa/drivers/dri/i965/brw_fs.h > >> > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > >> > @@ -105,14 +105,14 @@ public: > >> > uint32_t const_offset); > >> > void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf); > >> > > >> > - bool run_fs(bool do_rep_send); > >> > + bool run_fs(bool allow_spilling, bool do_rep_send); > >> > bool run_vs(gl_clip_plane *clip_planes); > >> > bool run_tcs_single_patch(); > >> > bool run_tes(); > >> > bool run_gs(); > >> > bool run_cs(); > >> > void optimize(); > >> > - void allocate_registers(); > >> > + void allocate_registers(bool allow_spilling); > >> > void setup_fs_payload_gen4(); > >> > void setup_fs_payload_gen6(); > >> > void setup_vs_payload(); > >> > @@ -127,7 +127,7 @@ public: > >> > void assign_tcs_single_patch_urb_setup(); > >> > void assign_tes_urb_setup(); > >> > void assign_gs_urb_setup(); > >> > - bool assign_regs(bool allow_spilling); > >> > + bool assign_regs(bool allow_spilling, bool spill_all); > >> > void assign_regs_trivial(); > >> > void calculate_payload_ranges(int payload_node_count, > >> > int *payload_last_use_ip); > >> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp > > b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp > >> > index 2347cd5..e65f73b 100644 > >> > --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp > >> > +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp > >> > @@ -542,7 +542,7 @@ setup_mrf_hack_interference(fs_visitor *v, struct > > ra_graph *g, > >> > } > >> > > >> > bool > >> > -fs_visitor::assign_regs(bool allow_spilling) > >> > +fs_visitor::assign_regs(bool allow_spilling, bool spill_all) > >> > { > >> > /* Most of this allocation was written for a reg_width of 1 > >> > * (dispatch_width == 8). In extending to SIMD16, the code was > >> > @@ -668,7 +668,7 @@ fs_visitor::assign_regs(bool allow_spilling) > >> > } > >> > > >> > /* Debug of register spilling: Go spill everything. */ > >> > - if (unlikely(INTEL_DEBUG & DEBUG_SPILL_FS)) { > >> > + if (unlikely(spill_all)) { > >> > int reg = choose_spill_reg(g); > >> > > >> > if (reg != -1) { > >> > diff --git a/src/mesa/drivers/dri/i965/brw_wm.c > > b/src/mesa/drivers/dri/i965/brw_wm.c > >> > index 395b0b8..8af1ff7 100644 > >> > --- a/src/mesa/drivers/dri/i965/brw_wm.c > >> > +++ b/src/mesa/drivers/dri/i965/brw_wm.c > >> > @@ -137,7 +137,8 @@ brw_codegen_wm_prog(struct brw_context *brw, > >> > program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx, > >> > key, &prog_data, fp->program.Base.nir, > >> > &fp->program.Base, st_index8, st_index16, > >> > - brw->use_rep_send, &program_size, > > &error_str); > >> > + true, brw->use_rep_send, > >> > + &program_size, &error_str); > >> > if (program == NULL) { > >> > if (prog) { > >> > prog->LinkStatus = false; > >> > -- > >> > 2.5.0.400.gff86faf >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev