On Thu, Jul 6, 2017 at 5:22 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > This adds a NIR pass that decides which portions of UBOS we should > upload as push constants, rather than pull constants. > --- > src/intel/Makefile.sources | 1 + > src/intel/compiler/brw_compiler.h | 11 + > src/intel/compiler/brw_nir.h | 4 + > src/intel/compiler/brw_nir_analyze_ubo_ranges.c | 271 > ++++++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_gs.c | 2 + > src/mesa/drivers/dri/i965/brw_tcs.c | 2 + > src/mesa/drivers/dri/i965/brw_tes.c | 2 + > src/mesa/drivers/dri/i965/brw_vs.c | 2 + > src/mesa/drivers/dri/i965/brw_wm.c | 2 + > 9 files changed, 297 insertions(+) > create mode 100644 src/intel/compiler/brw_nir_analyze_ubo_ranges.c > > diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources > index b672e615c52..f0a8bf517a1 100644 > --- a/src/intel/Makefile.sources > +++ b/src/intel/Makefile.sources > @@ -73,6 +73,7 @@ COMPILER_FILES = \ > compiler/brw_nir.h \ > compiler/brw_nir.c \ > compiler/brw_nir_analyze_boolean_resolves.c \ > + compiler/brw_nir_analyze_ubo_ranges.c \ > compiler/brw_nir_attribute_workarounds.c \ > compiler/brw_nir_intrinsics.c \ > compiler/brw_nir_opt_peephole_ffma.c \ > diff --git a/src/intel/compiler/brw_compiler.h > b/src/intel/compiler/brw_compiler.h > index e4c22e31177..d8e7717e867 100644 > --- a/src/intel/compiler/brw_compiler.h > +++ b/src/intel/compiler/brw_compiler.h > @@ -468,6 +468,15 @@ struct brw_image_param { > */ > #define BRW_SHADER_TIME_STRIDE 64 > > +struct brw_ubo_range > +{ > + // XXX: jason says that 255 won't be enough for vulkan - we may have > + // large amounts of UBOs in the future. use uint16_t. > + uint8_t block; > + uint8_t start; > + uint8_t length; > +}; > + > struct brw_stage_prog_data { > struct { > /** size of our binding table. */ > @@ -488,6 +497,8 @@ struct brw_stage_prog_data { > /** @} */ > } binding_table; > > + struct brw_ubo_range ubo_ranges[4]; > + > GLuint nr_params; /**< number of float params/constants */ > GLuint nr_pull_params; > unsigned nr_image_params; > diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h > index 5d866b86ac8..560027c3662 100644 > --- a/src/intel/compiler/brw_nir.h > +++ b/src/intel/compiler/brw_nir.h > @@ -142,6 +142,10 @@ void brw_nir_setup_glsl_uniforms(nir_shader *shader, > void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog, > struct brw_stage_prog_data *stage_prog_data); > > +void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, > + nir_shader *nir, > + struct brw_ubo_range out_ranges[4]); > + > bool brw_nir_opt_peephole_ffma(nir_shader *shader); > > #define BRW_NIR_FRAG_OUTPUT_INDEX_SHIFT 0 > diff --git a/src/intel/compiler/brw_nir_analyze_ubo_ranges.c > b/src/intel/compiler/brw_nir_analyze_ubo_ranges.c > new file mode 100644 > index 00000000000..3535e67758c > --- /dev/null > +++ b/src/intel/compiler/brw_nir_analyze_ubo_ranges.c > @@ -0,0 +1,271 @@ > +/* > + * Copyright © 2015 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "brw_nir.h" > +#include "compiler/nir/nir.h" > +#include "util/u_dynarray.h" > + > +/** > + * \file brw_nir_analyze_ubo_ranges.c > + * > + * This pass decides which portions of UBOs to upload as push constants, > + * so shaders can access them as part of the thread payload, rather than > + * having to issue expensive memory reads to pull the data. > + * > + * The 3DSTATE_CONSTANT_* mechanism can push data from up to 4 different > + * buffers, in GRF (256-bit/32-byte) units. > + * > + * To do this, we examine NIR load_ubo intrinsics, recording the number of > + * loads at each offset. We track offsets at a 32-byte granularity, so even > + * fields with a bit of padding between them tend to fall into contiguous > + * ranges. We build a list of these ranges, tracking their "cost" (number > + * of registers required) and "benefit" (number of pull loads eliminated > + * by pushing the range). We then sort the list to obtain the four best > + * ranges (most benefit for the least cost). > + */ > + > +struct ubo_range_entry > +{ > + struct brw_ubo_range range; > + int benefit; > +}; > + > +static int > +score(const struct ubo_range_entry *entry) > +{ > + return 2 * entry->benefit - entry->range.length; > +} > + > +/** > + * Compares score for two UBO range entries. > + * > + * For a descending qsort(). > + */ > +static int > +cmp_ubo_range_entry(const void *va, const void *vb) > +{ > + const struct ubo_range_entry *a = va; > + const struct ubo_range_entry *b = vb; > + > + /* Rank based on scores */ > + int delta = score(b) - score(a); > + > + /* Then use the UBO block index as a tie-breaker */ > + if (delta == 0) > + delta = b->range.block - a->range.block; > + > + /* Finally use the UBO offset as a second tie-breaker */ > + if (delta == 0) > + delta = b->range.block - a->range.block; > + > + return delta; > +} > + > +struct ubo_block_info > +{ > + uint64_t offsets; > + uint8_t uses[64]; > +}; > + > +struct ubo_analysis_state > +{ > + struct hash_table *blocks; > + bool uses_regular_uniforms; > +}; > + > +static struct ubo_block_info * > +get_block_info(struct ubo_analysis_state *state, int block) > +{ > + uint32_t hash = block + 1; > + void *key = (void *) (uintptr_t) hash; > + > + struct hash_entry *entry = > + _mesa_hash_table_search_pre_hashed(state->blocks, hash, key); > + > + if (entry) > + return (struct ubo_block_info *) entry->data; > + > + struct ubo_block_info *info = > + rzalloc(state->blocks, struct ubo_block_info); > + _mesa_hash_table_insert_pre_hashed(state->blocks, hash, key, info); > + > + return info; > +} > + > +static void > +analyze_ubos_block(struct ubo_analysis_state *state, nir_block *block) > +{ > + nir_foreach_instr(instr, block) { > + if (instr->type != nir_instr_type_intrinsic) > + continue; > + > + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); > + if (intrin->intrinsic == nir_intrinsic_load_uniform) > + state->uses_regular_uniforms = true; > + > + if (intrin->intrinsic != nir_intrinsic_load_ubo) > + continue; > + > + nir_const_value *block_const = nir_src_as_const_value(intrin->src[0]); > + nir_const_value *offset_const = nir_src_as_const_value(intrin->src[1]); > + > + if (block_const && offset_const) { > + const int block = block_const->u32[0]; > + const int offset = offset_const->u32[0] / 32; > + > + /* Won't fit in our bitfield */ > + if (offset >= 64) > + continue; > + > + /* TODO: should we count uses in loops as higher benefit? */ > + > + struct ubo_block_info *info = get_block_info(state, block); > + info->offsets |= 1ull << offset; > + info->uses[offset]++; > + } > + } > +} > + > +static void > +print_ubo_entry(FILE *file, > + const struct ubo_range_entry *entry, > + struct ubo_analysis_state *state) > +{ > + struct ubo_block_info *info = get_block_info(state, entry->range.block); > + > + fprintf(file, > + "block %2d, start %2d, length %2d, bits = %zx, " > + "benefit %2d, cost %2d, score = %2d\n", > + entry->range.block, entry->range.start, entry->range.length, > + info->offsets, entry->benefit, entry->range.length, score(entry)); > +} > + > +void > +brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, > + nir_shader *nir, > + struct brw_ubo_range out_ranges[4]) > +{ > + const struct gen_device_info *devinfo = compiler->devinfo; > + > + if (devinfo->gen <= 7 && !devinfo->is_haswell) { > + memset(out_ranges, 0, 4 * sizeof(struct brw_ubo_range)); > + return; > + } > + > + void *mem_ctx = ralloc_context(NULL); > + > + struct ubo_analysis_state state = { > + .uses_regular_uniforms = false, > + .blocks = > + _mesa_hash_table_create(mem_ctx, NULL, _mesa_key_pointer_equal), > + }; > + > + /* Walk the IR, recording how many times each UBO block/offset is used. */ > + nir_foreach_function(function, nir) { > + if (function->impl) { > + nir_foreach_block(block, function->impl) { > + analyze_ubos_block(&state, block); > + } > + } > + } > + > + /* Find ranges. */ > + struct util_dynarray ranges; > + util_dynarray_init(&ranges, mem_ctx); > + > + struct hash_entry *entry; > + hash_table_foreach(state.blocks, entry) { > + const int b = entry->hash - 1; > + const struct ubo_block_info *info = entry->data; > + uint64_t offsets = info->offsets; > + > + while (offsets != 0) { > + int first_bit = ffsll(offsets) - 1;
Okay, get the zero-indexed first set bit. > + int first_hole = ffsll(~offsets & ~((1ull << first_bit) - 1)) - 1; ~((1ull << first_bit) - 1) gives the mask of bits greater than or equal to first_bit. offsets is a bitmask indicating presence of data within a UBO block? So ~offset is the bitmask of the padding, and AND'ing gives us a bitmask of ??? I'm lost. Do you just want to use __builtin_clz (or util_logbase2)? > + if (first_hole == -1) { > + first_hole = 64; > + offsets = 0; Okay, so anyway, first_hole == -1 indicates the whole block is full, in which case we want to consider the whole block starting at offset=0. Wait, no. offsets is still as mask... Need some help getting through this patch. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev