This is needed for RADV to support explicit component packing.

This is also required to use the new NIR component splitting /
packing passes.

V2:
 - add commponent packing support for interpolate_at* intrinsics
 - improve store packing support when not all varyings are scalar
   as spotted by Bas the store source was incorrectly offset.
---
 src/amd/common/ac_nir_to_llvm.c | 68 +++++++++++++++++++++++++++++++----------
 1 file changed, 52 insertions(+), 16 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 2e50e50b12..5d9c5be7d2 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1060,21 +1060,20 @@ static int get_llvm_num_components(LLVMValueRef value)
                                      : 1;
        return num_components;
 }
 
 static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac,
                                      LLVMValueRef value,
                                      int index)
 {
        int count = get_llvm_num_components(value);
 
-       assert(index < count);
        if (count == 1)
                return value;
 
        return LLVMBuildExtractElement(ac->builder, value,
                                       LLVMConstInt(ac->i32, index, false), "");
 }
 
 static LLVMValueRef trim_vector(struct ac_llvm_context *ctx,
                                 LLVMValueRef value, unsigned count)
 {
@@ -2811,20 +2810,42 @@ get_dw_address(struct nir_to_llvm_context *ctx,
        dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
                               LLVMConstInt(ctx->i32, param * 4, false), "");
 
        if (const_index && compact_const_index)
                dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
                                       LLVMConstInt(ctx->i32, const_index, 
false), "");
        return dw_addr;
 }
 
 static LLVMValueRef
+build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
+                           unsigned value_count, unsigned component)
+{
+       LLVMValueRef vec = NULL;
+
+       if (value_count == 1) {
+               return values[component];
+       } else if (!value_count)
+               unreachable("value_count is 0");
+
+       for (unsigned i = component; i < value_count + component; i++) {
+               LLVMValueRef value = values[i];
+
+               if (!i)
+                       vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), 
value_count));
+               LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
+               vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, 
"");
+       }
+       return vec;
+}
+
+static LLVMValueRef
 load_tcs_input(struct nir_to_llvm_context *ctx,
               nir_intrinsic_instr *instr)
 {
        LLVMValueRef dw_addr, stride;
        unsigned const_index;
        LLVMValueRef vertex_index;
        LLVMValueRef indir_index;
        unsigned param;
        LLVMValueRef value[4], result;
        const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, 
ctx->stage);
@@ -2832,26 +2853,27 @@ load_tcs_input(struct nir_to_llvm_context *ctx,
        param = 
shader_io_get_unique_index(instr->variables[0]->var->data.location);
        get_deref_offset(ctx->nir, instr->variables[0],
                         false, NULL, per_vertex ? &vertex_index : NULL,
                         &const_index, &indir_index);
 
        stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8);
        dw_addr = get_tcs_in_current_patch_offset(ctx);
        dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, 
vertex_index, stride,
                                 indir_index);
 
-       for (unsigned i = 0; i < instr->num_components; i++) {
+       unsigned comp = instr->variables[0]->var->data.location_frac;
+       for (unsigned i = 0; i < instr->num_components + comp; i++) {
                value[i] = lds_load(ctx, dw_addr);
                dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
                                       ctx->i32one, "");
        }
-       result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
+       result = build_varying_gather_values(&ctx->ac, value, 
instr->num_components, comp);
        result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, 
&instr->dest.ssa), "");
        return result;
 }
 
 static LLVMValueRef
 load_tcs_output(struct nir_to_llvm_context *ctx,
               nir_intrinsic_instr *instr)
 {
        LLVMValueRef dw_addr;
        LLVMValueRef stride = NULL;
@@ -2870,43 +2892,45 @@ load_tcs_output(struct nir_to_llvm_context *ctx,
        if (!instr->variables[0]->var->data.patch) {
                stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
                dw_addr = get_tcs_out_current_patch_offset(ctx);
        } else {
                dw_addr = get_tcs_out_current_patch_data_offset(ctx);
        }
 
        dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, 
vertex_index, stride,
                                 indir_index);
 
-       for (unsigned i = 0; i < instr->num_components; i++) {
+       unsigned comp = instr->variables[0]->var->data.location_frac;
+       for (unsigned i = comp; i < instr->num_components + comp; i++) {
                value[i] = lds_load(ctx, dw_addr);
                dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
                                       ctx->i32one, "");
        }
-       result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
+       result = build_varying_gather_values(&ctx->ac, value, 
instr->num_components, comp);
        result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, 
&instr->dest.ssa), "");
        return result;
 }
 
 static void
 store_tcs_output(struct nir_to_llvm_context *ctx,
                 nir_intrinsic_instr *instr,
                 LLVMValueRef src,
                 unsigned writemask)
 {
        LLVMValueRef dw_addr;
        LLVMValueRef stride = NULL;
        LLVMValueRef buf_addr = NULL;
        LLVMValueRef vertex_index = NULL;
        LLVMValueRef indir_index = NULL;
        unsigned const_index = 0;
        unsigned param;
+       const unsigned comp = instr->variables[0]->var->data.location_frac;
        const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, 
ctx->stage);
        const bool is_compact = instr->variables[0]->var->data.compact;
 
        get_deref_offset(ctx->nir, instr->variables[0],
                         false, NULL, per_vertex ? &vertex_index : NULL,
                         &const_index, &indir_index);
 
        param = 
shader_io_get_unique_index(instr->variables[0]->var->data.location);
        if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 
&&
            is_compact && const_index > 3) {
@@ -2930,21 +2954,21 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
 
        bool is_tess_factor = false;
        if (instr->variables[0]->var->data.location == 
VARYING_SLOT_TESS_LEVEL_INNER ||
            instr->variables[0]->var->data.location == 
VARYING_SLOT_TESS_LEVEL_OUTER)
                is_tess_factor = true;
 
        unsigned base = is_compact ? const_index : 0;
        for (unsigned chan = 0; chan < 8; chan++) {
                if (!(writemask & (1 << chan)))
                        continue;
-               LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan);
+               LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - 
comp);
 
                lds_store(ctx, dw_addr, value);
 
                if (!is_tess_factor && writemask != 0xF)
                        ac_build_buffer_store_dword(&ctx->ac, 
ctx->hs_ring_tess_offchip, value, 1,
                                                    buf_addr, ctx->oc_lds,
                                                    4 * (base + chan), 1, 0, 
true, false);
 
                dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
                                       ctx->i32one, "");
@@ -2972,23 +2996,28 @@ load_tes_input(struct nir_to_llvm_context *ctx,
 
        get_deref_offset(ctx->nir, instr->variables[0],
                         false, NULL, per_vertex ? &vertex_index : NULL,
                         &const_index, &indir_index);
        param = 
shader_io_get_unique_index(instr->variables[0]->var->data.location);
        if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 
&&
            is_compact && const_index > 3) {
                const_index -= 3;
                param++;
        }
+
+       unsigned comp = instr->variables[0]->var->data.location_frac;
        buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
                                                     is_compact, vertex_index, 
indir_index);
 
+       LLVMValueRef comp_offset = LLVMConstInt(ctx->i32, comp * 4, false);
+       buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
+
        result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, 
instr->num_components, NULL,
                                      buf_addr, ctx->oc_lds, is_compact ? (4 * 
const_index) : 0, 1, 0, true, false);
        result = trim_vector(&ctx->ac, result, instr->num_components);
        result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, 
&instr->dest.ssa), "");
        return result;
 }
 
 static LLVMValueRef
 load_gs_input(struct nir_to_llvm_context *ctx,
              nir_intrinsic_instr *instr)
@@ -3001,21 +3030,23 @@ load_gs_input(struct nir_to_llvm_context *ctx,
        unsigned vertex_index;
        get_deref_offset(ctx->nir, instr->variables[0],
                         false, &vertex_index, NULL,
                         &const_index, &indir_index);
        vtx_offset_param = vertex_index;
        assert(vtx_offset_param < 6);
        vtx_offset = LLVMBuildMul(ctx->builder, 
ctx->gs_vtx_offset[vtx_offset_param],
                                  LLVMConstInt(ctx->i32, 4, false), "");
 
        param = 
shader_io_get_unique_index(instr->variables[0]->var->data.location);
-       for (unsigned i = 0; i < instr->num_components; i++) {
+
+       unsigned comp = instr->variables[0]->var->data.location_frac;
+       for (unsigned i = comp; i < instr->num_components + comp; i++) {
                if (ctx->ac.chip_class >= GFX9) {
                        LLVMValueRef dw_addr = 
ctx->gs_vtx_offset[vtx_offset_param];
                        dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
                                               LLVMConstInt(ctx->ac.i32, param 
* 4 + i + const_index, 0), "");
                        value[i] = lds_load(ctx, dw_addr);
                } else {
                        args[0] = ctx->esgs_ring;
                        args[1] = vtx_offset;
                        args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + 
const_index) * 256, false);
                        args[3] = ctx->i32zero;
@@ -3024,21 +3055,21 @@ load_gs_input(struct nir_to_llvm_context *ctx,
                        args[6] = ctx->i32one; /* GLC */
                        args[7] = ctx->i32zero; /* SLC */
                        args[8] = ctx->i32zero; /* TFE */
 
                        value[i] = ac_build_intrinsic(&ctx->ac, 
"llvm.SI.buffer.load.dword.i32.i32",
                                                      ctx->i32, args, 9,
                                                      AC_FUNC_ATTR_READONLY |
                                                      AC_FUNC_ATTR_LEGACY);
                }
        }
-       result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
+       result = build_varying_gather_values(&ctx->ac, value, 
instr->num_components, comp);
 
        return result;
 }
 
 static LLVMValueRef
 build_gep_for_deref(struct ac_nir_context *ctx,
                    nir_deref_var *deref)
 {
        struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, 
deref->var);
        assert(entry->data);
@@ -3074,41 +3105,43 @@ build_gep_for_deref(struct ac_nir_context *ctx,
        }
        return val;
 }
 
 static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
                                   nir_intrinsic_instr *instr)
 {
        LLVMValueRef values[8];
        int idx = instr->variables[0]->var->data.driver_location;
        int ve = instr->dest.ssa.num_components;
+       unsigned comp = instr->variables[0]->var->data.location_frac;
        LLVMValueRef indir_index;
        LLVMValueRef ret;
        unsigned const_index;
        bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
                     instr->variables[0]->var->data.mode == nir_var_shader_in;
        get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
                                      &const_index, &indir_index);
 
        if (instr->dest.ssa.bit_size == 64)
                ve *= 2;
 
        switch (instr->variables[0]->var->data.mode) {
        case nir_var_shader_in:
                if (ctx->stage == MESA_SHADER_TESS_CTRL)
                        return load_tcs_input(ctx->nctx, instr);
                if (ctx->stage == MESA_SHADER_TESS_EVAL)
                        return load_tes_input(ctx->nctx, instr);
                if (ctx->stage == MESA_SHADER_GEOMETRY) {
                        return load_gs_input(ctx->nctx, instr);
                }
-               for (unsigned chan = 0; chan < ve; chan++) {
+
+               for (unsigned chan = comp; chan < ve + comp; chan++) {
                        if (indir_index) {
                                unsigned count = glsl_count_attribute_slots(
                                                instr->variables[0]->var->type,
                                                ctx->stage == 
MESA_SHADER_VERTEX);
                                count -= chan / 4;
                                LLVMValueRef tmp_vec = 
ac_build_gather_values_extended(
                                                &ctx->ac, ctx->abi->inputs + 
idx + chan, count,
                                                4, false, true);
 
                                values[chan] = 
LLVMBuildExtractElement(ctx->ac.builder,
@@ -3140,21 +3173,22 @@ static LLVMValueRef visit_load_var(struct 
ac_nir_context *ctx,
                LLVMValueRef address = build_gep_for_deref(ctx,
                                                           instr->variables[0]);
                LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
                return LLVMBuildBitCast(ctx->ac.builder, val,
                                        get_def_type(ctx, &instr->dest.ssa),
                                        "");
        }
        case nir_var_shader_out:
                if (ctx->stage == MESA_SHADER_TESS_CTRL)
                        return load_tcs_output(ctx->nctx, instr);
-               for (unsigned chan = 0; chan < ve; chan++) {
+
+               for (unsigned chan = comp; chan < ve + comp; chan++) {
                        if (indir_index) {
                                unsigned count = glsl_count_attribute_slots(
                                                instr->variables[0]->var->type, 
false);
                                count -= chan / 4;
                                LLVMValueRef tmp_vec = 
ac_build_gather_values_extended(
                                                &ctx->ac, ctx->outputs + idx + 
chan, count,
                                                4, true, true);
 
                                values[chan] = 
LLVMBuildExtractElement(ctx->ac.builder,
                                                                       tmp_vec,
@@ -3162,32 +3196,33 @@ static LLVMValueRef visit_load_var(struct 
ac_nir_context *ctx,
                        } else {
                                values[chan] = LLVMBuildLoad(ctx->ac.builder,
                                                     ctx->outputs[idx + chan + 
const_index * 4],
                                                     "");
                        }
                }
                break;
        default:
                unreachable("unhandle variable mode");
        }
-       ret = ac_build_gather_values(&ctx->ac, values, ve);
+       ret = build_varying_gather_values(&ctx->ac, values, ve, comp);
        return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, 
&instr->dest.ssa), "");
 }
 
 static void
 visit_store_var(struct ac_nir_context *ctx,
                nir_intrinsic_instr *instr)
 {
        LLVMValueRef temp_ptr, value;
        int idx = instr->variables[0]->var->data.driver_location;
+       unsigned comp = instr->variables[0]->var->data.location_frac;
        LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
-       int writemask = instr->const_index[0];
+       int writemask = instr->const_index[0] << comp;
        LLVMValueRef indir_index;
        unsigned const_index;
        get_deref_offset(ctx, instr->variables[0], false,
                         NULL, NULL, &const_index, &indir_index);
 
        if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
                int old_writemask = writemask;
 
                src = LLVMBuildBitCast(ctx->ac.builder, src,
                                       LLVMVectorType(ctx->ac.f32, 
get_llvm_num_components(src) * 2),
@@ -3206,21 +3241,21 @@ visit_store_var(struct ac_nir_context *ctx,
                if (ctx->stage == MESA_SHADER_TESS_CTRL) {
                        store_tcs_output(ctx->nctx, instr, src, writemask);
                        return;
                }
 
                for (unsigned chan = 0; chan < 8; chan++) {
                        int stride = 4;
                        if (!(writemask & (1 << chan)))
                                continue;
 
-                       value = llvm_extract_elem(&ctx->ac, src, chan);
+                       value = llvm_extract_elem(&ctx->ac, src, chan - comp);
 
                        if (instr->variables[0]->var->data.compact)
                                stride = 1;
                        if (indir_index) {
                                unsigned count = glsl_count_attribute_slots(
                                                instr->variables[0]->var->type, 
false);
                                count -= chan / 4;
                                LLVMValueRef tmp_vec = 
ac_build_gather_values_extended(
                                                &ctx->ac, ctx->outputs + idx + 
chan, count,
                                                stride, true, true);
@@ -3907,21 +3942,21 @@ static LLVMValueRef load_sample_pos(struct 
ac_nir_context *ctx)
        LLVMValueRef values[2];
 
        values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0]);
        values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1]);
        return ac_build_gather_values(&ctx->ac, values, 2);
 }
 
 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
                                 const nir_intrinsic_instr *instr)
 {
-       LLVMValueRef result[2];
+       LLVMValueRef result[4];
        LLVMValueRef interp_param, attr_number;
        unsigned location;
        unsigned chan;
        LLVMValueRef src_c0 = NULL;
        LLVMValueRef src_c1 = NULL;
        LLVMValueRef src0 = NULL;
        int input_index = instr->variables[0]->var->data.location - 
VARYING_SLOT_VAR0;
        switch (instr->intrinsic) {
        case nir_intrinsic_interp_var_at_centroid:
                location = INTERP_CENTROID;
@@ -3985,42 +4020,43 @@ static LLVMValueRef visit_interp(struct 
nir_to_llvm_context *ctx,
                        temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
                        temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
 
                        ij_out[i] = LLVMBuildBitCast(ctx->builder,
                                                     temp2, ctx->i32, "");
                }
                interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
 
        }
 
-       for (chan = 0; chan < 2; chan++) {
+       for (chan = 0; chan < 4; chan++) {
                LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
 
                if (interp_param) {
                        interp_param = LLVMBuildBitCast(ctx->builder,
                                                        interp_param, 
LLVMVectorType(ctx->f32, 2), "");
                        LLVMValueRef i = LLVMBuildExtractElement(
                                ctx->builder, interp_param, ctx->i32zero, "");
                        LLVMValueRef j = LLVMBuildExtractElement(
                                ctx->builder, interp_param, ctx->i32one, "");
 
                        result[chan] = ac_build_fs_interp(&ctx->ac,
                                                          llvm_chan, 
attr_number,
                                                          ctx->prim_mask, i, j);
                } else {
                        result[chan] = ac_build_fs_interp_mov(&ctx->ac,
                                                              
LLVMConstInt(ctx->i32, 2, false),
                                                              llvm_chan, 
attr_number,
                                                              ctx->prim_mask);
                }
        }
-       return ac_build_gather_values(&ctx->ac, result, 2);
+       return build_varying_gather_values(&ctx->ac, result, 
instr->num_components,
+                                          
instr->variables[0]->var->data.location_frac);
 }
 
 static void
 visit_emit_vertex(struct nir_to_llvm_context *ctx,
                  const nir_intrinsic_instr *instr)
 {
        LLVMValueRef gs_next_vertex;
        LLVMValueRef can_emit, kill;
        int idx;
 
-- 
2.13.6

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to