Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> --- src/intel/compiler/brw_eu_defines.h | 2 + src/intel/compiler/brw_shader.cpp | 5 + src/intel/compiler/brw_vec4.cpp | 7 ++ src/intel/compiler/brw_vec4.h | 8 ++ src/intel/compiler/brw_vec4_generator.cpp | 136 +++++++++++++++++++++++++++ src/intel/compiler/brw_vec4_reg_allocate.cpp | 6 +- src/intel/compiler/brw_vec4_visitor.cpp | 49 ++++++++++ 7 files changed, 212 insertions(+), 1 deletion(-)
diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 1af835d47e..3c148de0fa 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -436,6 +436,8 @@ enum opcode { VEC4_OPCODE_PICK_HIGH_32BIT, VEC4_OPCODE_SET_LOW_32BIT, VEC4_OPCODE_SET_HIGH_32BIT, + VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_LOW, + VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_HIGH, FS_OPCODE_DDX_COARSE, FS_OPCODE_DDX_FINE, diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 53d0742d2e..248feacbd2 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -296,6 +296,11 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case FS_OPCODE_PACK: return "pack"; + + case VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_LOW: + return "gen4_scratch_read_1word_low"; + case VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_HIGH: + return "gen4_scratch_read_1word_high"; case SHADER_OPCODE_GEN4_SCRATCH_READ: return "gen4_scratch_read"; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index b443effca9..b6d409eea2 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -259,6 +259,8 @@ bool vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo) { switch (opcode) { + case VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_LOW: + case VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_HIGH: case SHADER_OPCODE_GEN4_SCRATCH_READ: case VEC4_OPCODE_DOUBLE_TO_F32: case VEC4_OPCODE_DOUBLE_TO_D32: @@ -335,6 +337,9 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst) return 1; case VS_OPCODE_PULL_CONSTANT_LOAD: return 2; + case VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_LOW: + case VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_HIGH: + return 1; case SHADER_OPCODE_GEN4_SCRATCH_READ: return 2; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: @@ -2091,6 +2096,8 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, { /* Do not split some instructions that require special handling */ switch (inst->opcode) { + case VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_LOW: + case VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_HIGH: case SHADER_OPCODE_GEN4_SCRATCH_READ: case SHADER_OPCODE_GEN4_SCRATCH_WRITE: return inst->exec_size; diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h index d828da02ea..a5b45aca21 100644 --- a/src/intel/compiler/brw_vec4.h +++ b/src/intel/compiler/brw_vec4.h @@ -214,6 +214,9 @@ public: enum brw_conditional_mod condition); vec4_instruction *IF(enum brw_predicate predicate); EMIT1(SCRATCH_READ) + vec4_instruction *DF_IVB_SCRATCH_READ(const dst_reg &dst, const src_reg &src0, + bool low); + EMIT2(SCRATCH_WRITE) EMIT3(LRP) EMIT1(BFREV) @@ -294,6 +297,11 @@ public: dst_reg dst, src_reg orig_src, int base_offset); + void emit_1grf_df_ivb_scratch_read(bblock_t *block, + vec4_instruction *inst, + dst_reg temp, src_reg orig_src, + int base_offset, bool first_grf); + void emit_scratch_write(bblock_t *block, vec4_instruction *inst, int base_offset); void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index 334933d15a..3bb931385a 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -1133,6 +1133,73 @@ generate_unpack_flags(struct brw_codegen *p, } static void +generate_scratch_read_1oword(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index, + bool low) +{ + const struct gen_device_info *devinfo = p->devinfo; + + assert(devinfo->gen >= 7 && inst->exec_size == 4 && + type_sz(dst.type) == 8); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_exec_size(p, BRW_EXECUTE_8); + + if (!low) { + /* Read second GRF (offset in OWORDs) */ + for (int i = 0; i < 2; i++) { + brw_oword_block_read_scratch(p, + dst, + brw_message_reg(inst->base_mrf), + 1, 32*inst->offset + 16*i + 32, false, true); + if (i == 0) { + /* The scratch read message writes the 128 MSB (OWORD1 HIGH) of + * the destination. We need to move them to dst.0 so we can + * read the pending 128 bits without using a temporary register. + */ + brw_set_default_exec_size(p, BRW_EXECUTE_4); + struct brw_reg tmp = + stride(suboffset(dst, 16 / type_sz(dst.type)), + 4, 4, 1); + + brw_set_default_mask_control(p, true); + brw_MOV(p, dst, tmp); + brw_set_default_mask_control(p, inst->force_writemask_all); + brw_set_default_exec_size(p, BRW_EXECUTE_8); + } + } + } else { + /* Read first GRF (offset in OWORDs) */ + for (int i = 1; i >= 0; i--) { + brw_oword_block_read_scratch(p, + dst, + brw_message_reg(inst->base_mrf), + 1, 32*inst->offset + 16*i, true, false); + + if (i == 1) { + /* The scratch read message writes the 128 LSB (OWORD1 LOW) of + * the destination. We need to move them to dst.4 so we can + * read the pending 128 bits without using a temporary register. + */ + struct brw_reg tmp = stride(dst, 4, 4, 1); + brw_set_default_exec_size(p, BRW_EXECUTE_4); + brw_set_default_mask_control(p, true); + brw_MOV(p, + suboffset(dst, 16 / type_sz(dst.type)), + tmp); + brw_set_default_mask_control(p, inst->force_writemask_all); + brw_set_default_exec_size(p, BRW_EXECUTE_8); + } + } + } + + brw_set_default_exec_size(p, cvt(inst->exec_size) - 1); + brw_set_default_access_mode(p, BRW_ALIGN_16); + return; +} + +static void generate_scratch_read(struct brw_codegen *p, vec4_instruction *inst, struct brw_reg dst, @@ -1143,6 +1210,16 @@ generate_scratch_read(struct brw_codegen *p, gen6_resolve_implied_move(p, &header, inst->base_mrf); + if (devinfo->gen >= 7 && inst->exec_size == 4 && + type_sz(dst.type) == 8) { + /* First read second GRF (offset in OWORDs) */ + struct brw_reg dst_high = suboffset(dst, 32 / type_sz(dst.type)); + generate_scratch_read_1oword(p, inst, dst_high, index, false); + /* Now read first GRF (data from first vertex) */ + generate_scratch_read_1oword(p, inst, dst, index, true); + return; + } + generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1), index); @@ -1192,6 +1269,57 @@ generate_scratch_write(struct brw_codegen *p, struct brw_reg header = brw_vec8_grf(0, 0); bool write_commit; + if (devinfo->gen >= 7 && inst->exec_size == 4 && + type_sz(src.type) == 8) { + brw_set_default_access_mode(p, BRW_ALIGN_1); + + /* The messages only works with group == 0, we use the group to know which + * message emit (1-OWORD LOW or 1-OWORD HIGH). + */ + brw_set_default_group(p, 0); + + if (inst->group == 0) { + for (int i = 0; i < 2; i++) { + brw_set_default_exec_size(p, BRW_EXECUTE_4); + brw_set_default_mask_control(p, true); + struct brw_reg temp = + retype(suboffset(src, i * 16 / type_sz(src.type)), BRW_REGISTER_TYPE_UD); + temp = stride(temp, 4, 4, 1); + + brw_MOV(p, brw_uvec_mrf(4, inst->base_mrf + 1, 0), + temp); + brw_set_default_mask_control(p, inst->force_writemask_all); + brw_set_default_exec_size(p, BRW_EXECUTE_8); + + /* Offset in OWORDs */ + brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), + 1, 32*inst->offset + 16*i, true, false); + } + } else { + for (int i = 0; i < 2; i++) { + brw_set_default_exec_size(p, BRW_EXECUTE_4); + + brw_set_default_mask_control(p, true); + struct brw_reg temp = + retype(suboffset(src, i * 16 / type_sz(src.type)), BRW_REGISTER_TYPE_UD); + temp = stride(temp, 4, 4, 1); + + brw_MOV(p, brw_uvec_mrf(4, inst->base_mrf + 1, 4), + temp); + + brw_set_default_mask_control(p, inst->force_writemask_all); + brw_set_default_exec_size(p, BRW_EXECUTE_8); + + /* Offset in OWORDs */ + brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), + 1, 32*inst->offset + 16*i + 32, false, true); + } + } + brw_set_default_exec_size(p, cvt(inst->exec_size) - 1); + brw_set_default_access_mode(p, BRW_ALIGN_16); + return; + } + /* If the instruction is predicated, we'll predicate the send, not * the header setup. */ @@ -1780,6 +1908,14 @@ generate_code(struct brw_codegen *p, generate_vs_urb_write(p, inst); break; + case VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_LOW: + generate_scratch_read_1oword(p, inst, dst, src[0], true); + fill_count++; + break; + case VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_HIGH: + generate_scratch_read_1oword(p, inst, dst, src[0], false); + fill_count++; + break; case SHADER_OPCODE_GEN4_SCRATCH_READ: generate_scratch_read(p, inst, dst, src[0]); fill_count++; diff --git a/src/intel/compiler/brw_vec4_reg_allocate.cpp b/src/intel/compiler/brw_vec4_reg_allocate.cpp index a0ba77b867..ec5ba10e86 100644 --- a/src/intel/compiler/brw_vec4_reg_allocate.cpp +++ b/src/intel/compiler/brw_vec4_reg_allocate.cpp @@ -332,7 +332,9 @@ can_use_scratch_for_source(const vec4_instruction *inst, unsigned i, * reusing scratch_reg for this instruction. */ if (prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE || - prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_READ) + prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_READ || + prev_inst->opcode == VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_LOW || + prev_inst->opcode == VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_HIGH) continue; /* If the previous instruction does not write to scratch_reg, then check @@ -467,6 +469,8 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) loop_scale /= 10; break; + case VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_LOW: + case VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_HIGH: case SHADER_OPCODE_GEN4_SCRATCH_READ: case SHADER_OPCODE_GEN4_SCRATCH_WRITE: for (int i = 0; i < 3; i++) { diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp index 22ee4dd1c4..37ae31c0d5 100644 --- a/src/intel/compiler/brw_vec4_visitor.cpp +++ b/src/intel/compiler/brw_vec4_visitor.cpp @@ -264,6 +264,24 @@ vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index) } vec4_instruction * +vec4_visitor::DF_IVB_SCRATCH_READ(const dst_reg &dst, + const src_reg &index, + bool first_grf) +{ + vec4_instruction *inst; + enum opcode op = first_grf ? + VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_LOW : + VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_HIGH; + + inst = new(mem_ctx) vec4_instruction(op, + dst, index); + inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1; + inst->mlen = 1; + + return inst; +} + +vec4_instruction * vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src, const src_reg &index) { @@ -1472,6 +1490,37 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst, /** * Emits an instruction before @inst to load the value named by @orig_src + * from scratch space at @base_offset to @temp. This instruction only reads + * DF value on IVB, one GRF each time. + * + * @base_offset is measured in 32-byte units (the size of a register). + * @first_grf indicates if we want to read first vertex data (true) or + * the second (false). + */ +void +vec4_visitor::emit_1grf_df_ivb_scratch_read(bblock_t *block, + vec4_instruction *inst, + dst_reg temp, src_reg orig_src, + int base_offset, bool first_grf) +{ + assert(orig_src.offset % REG_SIZE == 0); + src_reg index = get_scratch_offset(block, inst, 0, base_offset); + + assert(devinfo->gen == 7 && !devinfo->is_haswell && type_sz(temp.type) == 8); + temp.offset = 0; + vec4_instruction *read = DF_IVB_SCRATCH_READ(temp, index, first_grf); + read->exec_size = 4; + /* The instruction will use group 0 but a different message depending of the + * vertex data to load. + */ + read->group = 0; + read->offset = base_offset; + read->size_written = 1; + emit_before(block, inst, read); +} + +/** + * Emits an instruction before @inst to load the value named by @orig_src * from scratch space at @base_offset to @temp. * * @base_offset is measured in 32-byte units (the size of a register). -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev