On Sat, Jul 28, 2018 at 10:44:39PM -0700, Jason Ekstrand wrote: > This pass looks for variables with vector or array-of-vector types and > narrows the type to only the components used. > --- > src/compiler/nir/nir.h | 1 + > src/compiler/nir/nir_split_vars.c | 694 ++++++++++++++++++++++++++++++ > 2 files changed, 695 insertions(+)
This patch and the one that enables the pass are Reviewed-by: Caio Marcelo de Oliveira Filho <caio.olive...@intel.com> I have some suggestions below, pick the ones you like. (...) > +static void > +mark_deref_used(nir_deref_instr *deref, > + nir_component_mask_t comps_read, > + nir_component_mask_t comps_written, > + nir_deref_instr *copy_deref, > + struct hash_table *var_usage_map, > + nir_variable_mode modes, > + void *mem_ctx) > +{ > + if (!(deref->mode & modes)) > + return; > + > + nir_variable *var = nir_deref_instr_get_variable(deref); > + > + struct vec_var_usage *usage = > + get_vec_var_usage(var, var_usage_map, true, mem_ctx); > + if (!usage) > + return; > + > + const unsigned num_var_components = > + glsl_get_components(glsl_without_array_or_matrix(var->type)); Consider at vec_var_usage creation time, setting a usage->comps (or similar) to "(1u << num_var_components) - 1". Some cheap savings here and in other passes, also would result in less noise lines here and there. > + > + usage->comps_read |= comps_read & ((1u << num_var_components) - 1); > + usage->comps_written |= comps_written & ((1u << num_var_components) - 1); > + > + struct vec_var_usage *copy_usage = NULL; > + if (copy_deref) { > + copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes, > + true, mem_ctx); > + if (copy_usage) { > + if (usage->vars_copied == NULL) { > + usage->vars_copied = _mesa_set_create(mem_ctx, > _mesa_hash_pointer, > + _mesa_key_pointer_equal); > + } > + _mesa_set_add(usage->vars_copied, copy_usage); > + } else { > + usage->has_external_copy = true; > + } > + } > + > + nir_deref_path path; > + nir_deref_path_init(&path, deref, mem_ctx); > + > + nir_deref_path copy_path; > + if (copy_usage) > + nir_deref_path_init(©_path, copy_deref, mem_ctx); > + > + unsigned copy_i = 0; > + for (unsigned i = 0; i < usage->num_levels; i++) { > + struct array_level_usage *level = &usage->levels[i]; > + nir_deref_instr *deref = path.path[i + 1]; > + assert(deref->deref_type == nir_deref_type_array || > + deref->deref_type == nir_deref_type_array_wildcard); > + > + unsigned max_used; > + if (deref->deref_type == nir_deref_type_array) { > + nir_const_value *const_index = > + nir_src_as_const_value(deref->arr.index); > + max_used = const_index ? const_index->u32[0] : UINT_MAX; > + } else { > + /* For wildcards, we read or wrote the whole thing. */ > + assert(deref->deref_type == nir_deref_type_array_wildcard); > + max_used = level->array_len - 1; > + > + if (copy_usage) { > + /* Match each wildcard level with the level on copy_usage */ > + for (; copy_path.path[copy_i + 1]; copy_i++) { > + if (copy_path.path[copy_i + 1]->deref_type == > + nir_deref_type_array_wildcard) > + break; > + } > + struct array_level_usage *copy_level = > + ©_usage->levels[copy_i++]; > + > + if (level->levels_copied == NULL) { > + level->levels_copied = > + _mesa_set_create(mem_ctx, _mesa_hash_pointer, > + _mesa_key_pointer_equal); > + } > + _mesa_set_add(level->levels_copied, copy_level); Since once level->has_external_copy is set we don't really use the levels_copied, maybe wrap the bootstrap/set block with if (!level->has_external_copy)? > + } else { > + /* We have a wildcard and we don't know where this copy came > from, > + * flag it and we'll know to not shorten this array. > + */ Maybe instead of "we don't know" say that it comes from a variable from other mode. > + level->has_external_copy = true; > + } > + } > + > + if (comps_written) > + level->max_written = MAX2(level->max_written, max_used); > + if (comps_read) > + level->max_read = MAX2(level->max_read, max_used); > + } > +} (...) > +static void > +find_used_components_impl(nir_function_impl *impl, > + struct hash_table *var_usage_map, > + nir_variable_mode modes, > + void *mem_ctx) > +{ > + nir_foreach_block(block, impl) { > + nir_foreach_instr_safe(instr, block) { Unless I'm missing something, we don't need "safe" variant here. (...) > +static bool > +shrink_vec_var_list(struct exec_list *vars, > + struct hash_table *var_usage_map) > +{ > + /* Initialize the components kept field of each variable. This is the > + * AND of the components written and components read. If a component is > + * written but never read, it's dead. If it is read but never written, > + * then all values read are undefined garbage and we may as well not read > + * them. > + * > + * The same logic applies to the array length. We make the array length > + * the minimum needed required length between read and write and plan to > + * discard any OOB access. The one exception here is indirect writes > + * because we don't know where they will land and we can't shrink an array > + * with indirect writes because previously in-bounds writes may become > + * out-of-bounds and have undefined behavior. > + * > + * Also, if we have a copy that to/from something we can't shrink, we need > + * to leave components and array_len of any wildcards alone. > + */ > + nir_foreach_variable(var, vars) { > + struct vec_var_usage *usage = > + get_vec_var_usage(var, var_usage_map, false, NULL); > + if (!usage) > + continue; After reading I thought here and in the fix-point loop would be a better call to iterate directly var_usage_map. Due to the way we reuse var_usage_map, we would have to skip based on modes, which kind of spoil things a bit, but maybe still a win. Or use different sets and join them for the final step... Probably won't make much difference. > + > + const unsigned var_num_components = > + glsl_get_components(glsl_without_array_or_matrix(var->type)); > + > + assert(usage->comps_kept == 0); > + if (usage->has_external_copy) > + usage->comps_kept = (1u << var_num_components) - 1; > + else > + usage->comps_kept = usage->comps_read & usage->comps_written; > + > + for (unsigned i = 0; i < usage->num_levels; i++) { > + struct array_level_usage *level = &usage->levels[i]; > + assert(level->array_len > 0); > + > + if (level->max_written == UINT_MAX || level->has_external_copy) > + continue; /* Can't shrink */ > + > + unsigned max_used = MIN2(level->max_read, level->max_written); > + level->array_len = MIN2(max_used, level->array_len - 1) + 1; > + } > + } > + > + /* In order for variable copies to work, we have to have the same data > type > + * on the source and the destination. In order to satisfy this, we run a > + * little fixed-point algorithm to transitively ensure that we get enough > + * components and array elements for this to hold for all copies. > + */ > + bool fp_progress; > + do { > + fp_progress = false; > + nir_foreach_variable(var, vars) { > + struct vec_var_usage *var_usage = > + get_vec_var_usage(var, var_usage_map, false, NULL); > + if (!var_usage || !var_usage->vars_copied) > + continue; > + > + struct set_entry *copy_entry; > + set_foreach(var_usage->vars_copied, copy_entry) { > + const struct vec_var_usage *copy_usage = copy_entry->key; > + if (copy_usage->comps_kept & ~var_usage->comps_kept) { > + var_usage->comps_kept |= copy_usage->comps_kept; Could we set both sides here, i.e. do "copy_usage->comps_kept = var_usage->comps_kept"? The reasoning is that it might save an iteration, e.g. A <-> C and B <-> C, but iteration happens in order: A, B then C. Information from A will take two iterations to propagate to B. > + fp_progress = true; > + } > + } > + > + for (unsigned i = 0; i < var_usage->num_levels; i++) { > + struct array_level_usage *var_level = &var_usage->levels[i]; > + if (!var_level->levels_copied) > + continue; > + > + set_foreach(var_level->levels_copied, copy_entry) { > + const struct array_level_usage *copy_level = copy_entry->key; > + if (var_level->array_len < copy_level->array_len) { > + var_level->array_len = copy_level->array_len; > + fp_progress = true; > + } > + } > + } > + } > + } while (fp_progress); > + > + bool vars_shrunk = false; > + nir_foreach_variable_safe(var, vars) { > + struct vec_var_usage *usage = > + get_vec_var_usage(var, var_usage_map, false, NULL); > + if (!usage) > + continue; > + > + bool shrunk = false; > + const struct glsl_type *vec_type = var->type; > + for (unsigned i = 0; i < usage->num_levels; i++) { > + /* If we've reduced the array to zero elements at some level, just > + * set comps_kept to 0 and delete the variable. > + */ > + if (usage->levels[i].array_len == 0) > + usage->comps_kept = 0; Add a break here. The rest of the loop execution would try to find a shrink situation, but regardless the result it will be ignored since we bail when comps_kept == 0. > + > + assert(usage->levels[i].array_len <= glsl_get_length(vec_type)); > + if (usage->levels[i].array_len < glsl_get_length(vec_type)) > + shrunk = true; > + vec_type = glsl_get_array_element(vec_type); > + } > + assert(glsl_type_is_vector_or_scalar(vec_type)); > + > + assert(usage->comps_kept < (1u << glsl_get_components(vec_type))); > + if (usage->comps_kept != (1u << glsl_get_components(vec_type)) - 1) > + shrunk = true; > + > + if (usage->comps_kept == 0) { > + /* This variable is dead, remove it */ > + vars_shrunk = true; > + exec_node_remove(&var->node); > + continue; > + } > + > + if (!shrunk) { > + /* This variable doesn't need to be shrunk. Remove it from the > + * hash table so later passes will ignore it. s/later passes/later steps/ assuming you are referring to the access fixing step. > + */ > + _mesa_hash_table_remove_key(var_usage_map, var); > + continue; > + } (...) > +static void > +shrink_vec_var_access_impl(nir_function_impl *impl, > + struct hash_table *var_usage_map, > + nir_variable_mode modes) > +{ > + nir_builder b; > + nir_builder_init(&b, impl); > + > + nir_foreach_block(block, impl) { > + nir_foreach_instr_safe(instr, block) { > + switch (instr->type) { > + case nir_instr_type_deref: { > + nir_deref_instr *deref = nir_instr_as_deref(instr); > + if (!(deref->mode & modes)) > + break; > + > + /* Clean up any dead derefs we find lying around. They may refer > + * to variables we've deleted. > + */ > + if (nir_deref_instr_remove_if_unused(deref)) > + break; > + > + /* We don't need to check if this is one of the derefs we're > + * shrinking because this is a no-op if it isn't. The worst that > + * could happen is that we accidentally fix an invalid deref. > + */ Consider prefixing this comment with something like: "Update the new variable type in the deref." (...) Thanks, Caio _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev