Change the FS generator to ask the dataport for enough owords worth of constants to fill the execution size of the instruction -- Which means that the visitor now needs to set the execution size correctly for uniform pull constant load instructions, which we were kind of neglecting until now. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 15 +++++++------- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 27 ++++++++++++-------------- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 9 +++++---- 4 files changed, 26 insertions(+), 27 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 6141bfb..8536a13 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2256,7 +2256,7 @@ gen7_block_read_scratch(struct brw_codegen *p, } /** - * Read a float[4] vector from the data port constant cache. + * Read float[4] vectors from the data port constant cache. * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. */ @@ -2270,6 +2270,7 @@ void brw_oword_block_read(struct brw_codegen *p, const unsigned target_cache = (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE : BRW_DATAPORT_READ_TARGET_DATA_CACHE); + const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current); /* On newer hardware, offset is in units of owords. */ if (devinfo->gen >= 6) @@ -2278,11 +2279,12 @@ void brw_oword_block_read(struct brw_codegen *p, mrf = retype(mrf, BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ @@ -2291,6 +2293,7 @@ void brw_oword_block_read(struct brw_codegen *p, mrf.nr, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(offset)); + brw_pop_insn_state(p); brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); @@ -2305,15 +2308,13 @@ void brw_oword_block_read(struct brw_codegen *p, brw_inst_set_base_mrf(devinfo, insn, mrf.nr); } - brw_set_dp_read_message(p, - insn, - bind_table_index, - BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + brw_set_dp_read_message(p, insn, bind_table_index, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size), BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, target_cache, 1, /* msg_length */ true, /* header_present */ - 1); /* response_length (1 reg, 2 owords!) */ + DIV_ROUND_UP(exec_size, 8)); /* response_length */ brw_pop_insn_state(p); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 819d256..b6a571a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2121,7 +2121,7 @@ fs_visitor::lower_constant_loads() assert(inst->src[i].stride == 0); - const fs_builder ubld = ibld.exec_all().group(8, 0); + const fs_builder ubld = ibld.exec_all().group(4, 0); struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, dst, brw_imm_ud(index), offset); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 24bec5f..e73f2ca 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1127,6 +1127,7 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg index, struct brw_reg offset) { + assert(type_sz(dst.type) == 4); assert(inst->mlen != 0); assert(index.file == BRW_IMMEDIATE_VALUE && @@ -1149,27 +1150,25 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, { assert(index.type == BRW_REGISTER_TYPE_UD); assert(payload.file == BRW_GENERAL_REGISTER_FILE); + assert(type_sz(dst.type) == 4); if (index.file == BRW_IMMEDIATE_VALUE) { const uint32_t surf_index = index.ud; brw_push_insn_state(p); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); - brw_inst_set_exec_size(devinfo, send, BRW_EXECUTE_4); brw_pop_insn_state(p); - brw_set_dest(p, send, vec4(retype(dst, BRW_REGISTER_TYPE_UD))); - brw_set_src0(p, send, vec4(retype(payload, BRW_REGISTER_TYPE_UD))); - brw_set_dp_read_message(p, send, - surf_index, - BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD)); + brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); + brw_set_dp_read_message(p, send, surf_index, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size), GEN7_DATAPORT_DC_OWORD_BLOCK_READ, GEN6_SFID_DATAPORT_CONSTANT_CACHE, 1, /* mlen */ true, /* header */ - 1); /* rlen */ + DIV_ROUND_UP(inst->size_written, REG_SIZE)); } else { struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); @@ -1188,17 +1187,15 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, /* dst = send(payload, a0.0 | <descriptor>) */ brw_inst *insn = brw_send_indirect_message( p, GEN6_SFID_DATAPORT_CONSTANT_CACHE, - vec4(retype(dst, BRW_REGISTER_TYPE_UD)), - vec4(retype(payload, BRW_REGISTER_TYPE_UD)), addr); - brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4); - brw_set_dp_read_message(p, insn, - 0, /* surface */ - BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + retype(dst, BRW_REGISTER_TYPE_UD), + retype(payload, BRW_REGISTER_TYPE_UD), addr); + brw_set_dp_read_message(p, insn, 0 /* surface */, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size), GEN7_DATAPORT_DC_OWORD_BLOCK_READ, GEN6_SFID_DATAPORT_CONSTANT_CACHE, 1, /* mlen */ true, /* header */ - 1); /* rlen */ + DIV_ROUND_UP(inst->size_written, REG_SIZE)); brw_pop_insn_state(p); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 855266f..7e00086 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -4059,7 +4059,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr * and we have to split it if necessary. */ const unsigned type_size = type_sz(dest.type); - const fs_reg packed_consts = bld.vgrf(BRW_REGISTER_TYPE_F); + const fs_builder ubld = bld.exec_all().group(4, 0); + const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_F); + for (unsigned c = 0; c < instr->num_components;) { const unsigned base = const_offset->u32[0] + c * type_size; @@ -4067,9 +4069,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr const unsigned count = MIN2(instr->num_components - c, (16 - base % 16) / type_size); - bld.exec_all() - .emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - packed_consts, surf_index, brw_imm_ud(base & ~15)); + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + packed_consts, surf_index, brw_imm_ud(base & ~15)); const fs_reg consts = retype(byte_offset(packed_consts, base & 15), dest.type); -- 2.10.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev