Hi Eric,
I'm working on lima gp compiler which should benefit from nir lowering
uniform load to scalar.
I notice you write the nir_lower_io_to_scalar.c which support lowering
shader_in/shader_out
but left the uniform lowering in vc4 driver, any reason why not
implement in the nir_lower_io_to_scalar.c?
I'm new to nir, tried to add it but seems not correct after
optimization pass. So I should missing
some place, anyone can help to point out?
Thanks,
Qiang
diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h
index 3a519a73..93bf0fc 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -392,8 +392,8 @@ BARYCENTRIC(at_offset, 1, 2)
#define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \
INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, num_indices, idx0, idx1, idx2, flags)
-/* src[] = { offset }. const_index[] = { base, range } */
-LOAD(uniform, 1, 2, BASE, RANGE, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { offset }. const_index[] = { base, range, component } */
+LOAD(uniform, 1, 2, BASE, RANGE, COMPONENT, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/* src[] = { buffer_index, offset }. No const_index */
LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/* src[] = { offset }. const_index[] = { base, component } */
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index 1ae2cc7..566f8d0 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -208,7 +208,7 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
load->num_components = intrin->num_components;
nir_intrinsic_set_base(load, var->data.driver_location);
- if (mode == nir_var_shader_in || mode == nir_var_shader_out)
+ if (mode == nir_var_shader_in || mode == nir_var_shader_out || mode == nir_var_uniform)
nir_intrinsic_set_component(load, component);
if (load->intrinsic == nir_intrinsic_load_uniform)
diff --git a/src/compiler/nir/nir_lower_io_to_scalar.c b/src/compiler/nir/nir_lower_io_to_scalar.c
index f2345d5..d838770 100644
--- a/src/compiler/nir/nir_lower_io_to_scalar.c
+++ b/src/compiler/nir/nir_lower_io_to_scalar.c
@@ -26,8 +26,8 @@
/** @file nir_lower_io_to_scalar.c
*
- * Replaces nir_load_input/nir_store_output operations with num_components !=
- * 1 with individual per-channel operations.
+ * Replaces nir_load_input/nir_store_output/nir_load_uniform operations
+ * with num_components != 1 with individual per-channel operations.
*/
static void
@@ -63,6 +63,39 @@ lower_load_input_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
}
static void
+lower_load_uniform_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
+{
+ b->cursor = nir_before_instr(&intr->instr);
+
+ assert(intr->dest.is_ssa);
+
+ nir_ssa_def *loads[4];
+
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *chan_intr =
+ nir_intrinsic_instr_create(b->shader, intr->intrinsic);
+ nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest,
+ 1, intr->dest.ssa.bit_size, NULL);
+ chan_intr->num_components = 1;
+
+ nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr));
+ nir_intrinsic_set_component(chan_intr, nir_intrinsic_component(intr) + i);
+ nir_intrinsic_set_range(chan_intr, nir_intrinsic_range(intr));
+ /* offset */
+ nir_src_copy(&chan_intr->src[0], &intr->src[0], chan_intr);
+
+ nir_builder_instr_insert(b, &chan_intr->instr);
+
+ loads[i] = &chan_intr->dest.ssa;
+ }
+
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa,
+ nir_src_for_ssa(nir_vec(b, loads,
+ intr->num_components)));
+ nir_instr_remove(&intr->instr);
+}
+
+static void
lower_store_output_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
{
b->cursor = nir_before_instr(&intr->instr);
@@ -119,6 +152,10 @@ nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask)
if (mask & nir_var_shader_out)
lower_store_output_to_scalar(&b, intr);
break;
+ case nir_intrinsic_load_uniform:
+ if (mask & nir_var_uniform)
+ lower_load_uniform_to_scalar(&b, intr);
+ break;
default:
break;
}
diff --git a/src/gallium/drivers/lima/ir/gp/nir.c b/src/gallium/drivers/lima/ir/gp/nir.c
index 1f6b91d..e71c0a0 100644
--- a/src/gallium/drivers/lima/ir/gp/nir.c
+++ b/src/gallium/drivers/lima/ir/gp/nir.c
@@ -123,8 +123,8 @@ static gpir_node *gpir_emit_intrinsic(gpir_compiler *comp, nir_intrinsic_instr *
if (!lnode)
return NULL;
- lnode->index = instr->const_index[info->index_map[NIR_INTRINSIC_BASE] - 1];
- lnode->component = instr->const_index[info->index_map[NIR_INTRINSIC_COMPONENT] - 1];
+ lnode->index = nir_intrinsic_base(instr);
+ lnode->component = nir_intrinsic_component(instr);
return &lnode->node;
@@ -133,8 +133,8 @@ static gpir_node *gpir_emit_intrinsic(gpir_compiler *comp, nir_intrinsic_instr *
if (!snode)
return NULL;
- snode->index = instr->const_index[info->index_map[NIR_INTRINSIC_BASE] - 1];
- snode->component = instr->const_index[info->index_map[NIR_INTRINSIC_COMPONENT] - 1];
+ snode->index = nir_intrinsic_base(instr);
+ snode->component = nir_intrinsic_component(instr);
child = gpir_node_find(comp, instr->src);
snode->child = child;
diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c
index 68cfeb5..8ba6511 100644
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@@ -63,7 +63,8 @@ lima_program_optimize_nir(struct nir_shader *s)
bool progress;
NIR_PASS_V(s, nir_lower_load_const_to_scalar);
- NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in|nir_var_shader_out);
+ NIR_PASS_V(s, nir_lower_io_to_scalar,
+ nir_var_shader_in|nir_var_shader_out|nir_var_uniform);
do {
progress = false;
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev