Module: Mesa Branch: main Commit: a406fff78a57eab35c513042670500f4fc784042 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a406fff78a57eab35c513042670500f4fc784042
Author: Qiang Yu <[email protected]> Date: Thu Jul 22 16:16:58 2021 +0800 nir/inline_uniforms: support vector uniform Collect per vector component dependency and lower vector uniform load to scalar if any component need to be inlined. Reviewed-by: Marek Olšák <[email protected]> Signed-off-by: Qiang Yu <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11950> --- src/compiler/nir/nir_inline_uniforms.c | 118 ++++++++++++++++++++++++++------- 1 file changed, 94 insertions(+), 24 deletions(-) diff --git a/src/compiler/nir/nir_inline_uniforms.c b/src/compiler/nir/nir_inline_uniforms.c index 2099076fa3f..1a732e20a23 100644 --- a/src/compiler/nir/nir_inline_uniforms.c +++ b/src/compiler/nir/nir_inline_uniforms.c @@ -43,23 +43,49 @@ #define MAX_OFFSET (UINT16_MAX * 4) static bool -src_only_uses_uniforms(const nir_src *src, uint32_t *uni_offsets, - unsigned *num_offsets) +src_only_uses_uniforms(const nir_src *src, int component, + uint32_t *uni_offsets, unsigned *num_offsets) { if (!src->is_ssa) return false; + assert(component < src->ssa->num_components); + nir_instr *instr = src->ssa->parent_instr; switch (instr->type) { case nir_instr_type_alu: { - /* Return true if all sources return true. */ - /* TODO: Swizzles are ignored, so vectors can prevent inlining. */ nir_alu_instr *alu = nir_instr_as_alu(instr); + + /* Vector ops only need to check the corresponding component. */ + if (nir_op_is_vec(alu->op)) { + nir_alu_src *alu_src = alu->src + component; + return src_only_uses_uniforms(&alu_src->src, alu_src->swizzle[0], + uni_offsets, num_offsets); + } + + /* Return true if all sources return true. */ for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { - if (!src_only_uses_uniforms(&alu->src[i].src, uni_offsets, - num_offsets)) - return false; + nir_alu_src *alu_src = alu->src + i; + int input_sizes = nir_op_infos[alu->op].input_sizes[i]; + + if (input_sizes == 0) { + /* For ops which has no input size, each component of dest is + * only determined by the same component of srcs. + */ + if (!src_only_uses_uniforms(&alu_src->src, alu_src->swizzle[component], + uni_offsets, num_offsets)) + return false; + } else { + /* For ops which has input size, all components of dest are + * determined by all components of srcs (except vec ops). + */ + for (unsigned j = 0; j < input_sizes; j++) { + if (!src_only_uses_uniforms(&alu_src->src, alu_src->swizzle[j], + uni_offsets, num_offsets)) + return false; + } + } } return true; } @@ -74,11 +100,9 @@ src_only_uses_uniforms(const nir_src *src, uint32_t *uni_offsets, nir_src_as_uint(intr->src[0]) == 0 && nir_src_is_const(intr->src[1]) && nir_src_as_uint(intr->src[1]) <= MAX_OFFSET && - /* TODO: Can't handle vectors and other bit sizes for now. */ - /* UBO loads should be scalarized. */ - intr->dest.ssa.num_components == 1 && + /* TODO: Can't handle other bit sizes for now. */ intr->dest.ssa.bit_size == 32) { - uint32_t offset = nir_src_as_uint(intr->src[1]); + uint32_t offset = nir_src_as_uint(intr->src[1]) + component * 4; assert(offset < MAX_OFFSET); /* Already recorded by other one */ @@ -112,6 +136,8 @@ add_inlinable_uniforms(const nir_src *cond, uint32_t *uni_offsets, unsigned *num_offsets) { unsigned new_num = *num_offsets; + /* If condition SSA is always scalar, so component is 0. */ + unsigned component = 0; /* Only update uniform number when all uniforms in the expression * can be inlined. Partially inline uniforms can't lower if/loop. @@ -133,7 +159,7 @@ add_inlinable_uniforms(const nir_src *cond, uint32_t *uni_offsets, * unless uniform0, uniform1 and uniform2 can be inlined at once, * can the loop be unrolled. */ - if (src_only_uses_uniforms(cond, uni_offsets, &new_num)) + if (src_only_uses_uniforms(cond, component, uni_offsets, &new_num)) *num_offsets = new_num; } @@ -193,20 +219,64 @@ nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms, nir_src_is_const(intr->src[0]) && nir_src_as_uint(intr->src[0]) == 0 && nir_src_is_const(intr->src[1]) && - /* TODO: Can't handle vectors and other bit sizes for now. */ - /* UBO loads should be scalarized. */ - intr->dest.ssa.num_components == 1 && + /* TODO: Can't handle other bit sizes for now. */ intr->dest.ssa.bit_size == 32) { - uint64_t offset = nir_src_as_uint(intr->src[1]); - - for (unsigned i = 0; i < num_uniforms; i++) { - if (offset == uniform_dw_offsets[i] * 4) { - b.cursor = nir_before_instr(&intr->instr); - nir_ssa_def *def = nir_imm_int(&b, uniform_values[i]); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, def); - nir_instr_remove(&intr->instr); - break; + int num_components = intr->dest.ssa.num_components; + uint32_t offset = nir_src_as_uint(intr->src[1]) / 4; + + if (num_components == 1) { + /* Just replace the uniform load to constant load. */ + for (unsigned i = 0; i < num_uniforms; i++) { + if (offset == uniform_dw_offsets[i]) { + b.cursor = nir_before_instr(&intr->instr); + nir_ssa_def *def = nir_imm_int(&b, uniform_values[i]); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, def); + nir_instr_remove(&intr->instr); + break; + } + } + } else { + /* Lower vector uniform load to scalar and replace each + * found component load with constant load. + */ + uint32_t max_offset = offset + num_components; + nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS] = {0}; + bool found = false; + + b.cursor = nir_before_instr(&intr->instr); + + /* Find component to replace. */ + for (unsigned i = 0; i < num_uniforms; i++) { + uint32_t uni_offset = uniform_dw_offsets[i]; + if (uni_offset >= offset && uni_offset < max_offset) { + int index = uni_offset - offset; + components[index] = nir_imm_int(&b, uniform_values[i]); + found = true; + } } + + if (!found) + continue; + + /* Create per-component uniform load. */ + for (unsigned i = 0; i < num_components; i++) { + if (!components[i]) { + uint32_t scalar_offset = (offset + i) * 4; + components[i] = nir_load_ubo(&b, 1, intr->dest.ssa.bit_size, + intr->src[0].ssa, + nir_imm_int(&b, scalar_offset)); + nir_intrinsic_instr *load = + nir_instr_as_intrinsic(components[i]->parent_instr); + nir_intrinsic_set_align(load, NIR_ALIGN_MUL_MAX, scalar_offset); + nir_intrinsic_set_range_base(load, scalar_offset); + nir_intrinsic_set_range(load, 4); + } + } + + /* Replace the original uniform load. */ + nir_ssa_def_rewrite_uses(&intr->dest.ssa, + nir_vec(&b, components, num_components)); + nir_instr_remove(&intr->instr); } } }
