Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs.cpp | 3 ++- src/intel/compiler/brw_fs.h | 3 ++- src/intel/compiler/brw_fs_builder.h | 25 ++++++++++++++++++------- src/intel/compiler/brw_fs_copy_propagation.cpp | 1 + src/intel/compiler/brw_fs_nir.cpp | 9 +++++++-- src/intel/compiler/brw_ir_fs.h | 3 +++ 6 files changed, 33 insertions(+), 11 deletions(-)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index cedfde5096..9c3410b698 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -440,6 +440,7 @@ fs_reg::fs_reg(struct ::brw_reg reg) : { this->offset = 0; this->stride = 1; + this->pad_per_component = 0; if (this->file == IMM && (this->type != BRW_REGISTER_TYPE_V && this->type != BRW_REGISTER_TYPE_UV && @@ -467,7 +468,7 @@ fs_reg::component_size(unsigned width) const const unsigned stride = ((file != ARF && file != FIXED_GRF) ? this->stride : hstride == 0 ? 0 : 1 << (hstride - 1)); - return MAX2(width * stride, 1) * type_sz(type); + return (MAX2(width * stride, 1) * (type_sz(type)) + pad_per_component); } /** diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 30557324d5..d9c4f737e6 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -231,7 +231,8 @@ public: nir_jump_instr *instr); fs_reg get_nir_src(const nir_src &src); fs_reg get_nir_src_imm(const nir_src &src); - fs_reg get_nir_dest(const nir_dest &dest); + fs_reg get_nir_dest(const nir_dest &dest, + bool pad_components_to_full_registers = false); fs_reg get_nir_image_deref(const nir_deref_var *deref); fs_reg get_indirect_offset(nir_intrinsic_instr *instr); void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 633086c64b..804d52e5df 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -182,17 +182,28 @@ namespace brw { * component in this IR). */ dst_reg - vgrf(enum brw_reg_type type, unsigned n = 1) const + vgrf(enum brw_reg_type type, + unsigned n = 1, + bool pad_components_to_full_registers = false) const { assert(dispatch_width() <= 32); - if (n > 0) - return dst_reg(VGRF, shader->alloc.allocate( - DIV_ROUND_UP(n * type_sz(type) * dispatch_width(), - REG_SIZE)), - type); - else + if (n == 0) return retype(null_reg_ud(), type); + + const unsigned pad_per_component = + (pad_components_to_full_registers && + type_sz(type) == 2 && + dispatch_width() == 8) ? (REG_SIZE / 2) : 0; + const unsigned size = + n * ((type_sz(type) * dispatch_width()) + pad_per_component); + const unsigned nr = shader->alloc.allocate( + DIV_ROUND_UP(size, REG_SIZE)); + + dst_reg dst = dst_reg(VGRF, nr, type); + dst.pad_per_component = pad_per_component; + + return dst; } /** diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index ed2511ecfa..637a1de6ae 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -447,6 +447,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) inst->src[arg].file = entry->src.file; inst->src[arg].nr = entry->src.nr; inst->src[arg].stride *= entry->src.stride; + inst->src[arg].pad_per_component = entry->src.pad_per_component; inst->saturate = inst->saturate || entry->saturate; /* Compute the offset of inst->src[arg] relative to entry->dst */ diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 16e8dfc186..35e78b134a 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -357,6 +357,9 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl) unsigned size = array_elems * reg->num_components; const brw_reg_type reg_type = brw_reg_type_from_bit_size(reg->bit_size, BRW_REGISTER_TYPE_F); + + /* TODO: Consider if 16-bit component padding is needed. */ + nir_locals[reg->index] = bld.vgrf(reg_type, size); } @@ -1602,13 +1605,15 @@ fs_visitor::get_nir_src_imm(const nir_src &src) } fs_reg -fs_visitor::get_nir_dest(const nir_dest &dest) +fs_visitor::get_nir_dest(const nir_dest &dest, + bool pad_components_to_full_registers) { if (dest.is_ssa) { const brw_reg_type reg_type = brw_reg_type_from_bit_size(dest.ssa.bit_size, BRW_REGISTER_TYPE_F); nir_ssa_values[dest.ssa.index] = - bld.vgrf(reg_type, dest.ssa.num_components); + bld.vgrf(reg_type, dest.ssa.num_components, + pad_components_to_full_registers); return nir_ssa_values[dest.ssa.index]; } else { /* We don't handle indirects on locals */ diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index cd603630a4..b4a1d7ef5a 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -51,6 +51,9 @@ public: /** Register region horizontal stride */ uint8_t stride; + + /* Needed, for example, for SIMD8 half float payloads. */ + uint8_t pad_per_component; }; static inline fs_reg -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev