On Fri, Feb 27, 2015 at 05:34:48PM +0200, Francisco Jerez wrote: > Change brw_untyped_atomic() and brw_untyped_surface_read() to take the > surface index as a register instead of a constant and to use > brw_send_indirect_message() to emit the indirect variant of send with > a dynamically calculated message descriptor. This will be required to > support variable indexing of image arrays for > ARB_shader_image_load_store. > --- > src/mesa/drivers/dri/i965/brw_eu.h | 10 +- > src/mesa/drivers/dri/i965/brw_eu_emit.c | 158 > +++++++++++++---------- > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 +- > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 +- > 4 files changed, 96 insertions(+), 80 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_eu.h > b/src/mesa/drivers/dri/i965/brw_eu.h > index 87a9f3f..9cc9123 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu.h > +++ b/src/mesa/drivers/dri/i965/brw_eu.h > @@ -398,18 +398,18 @@ void brw_CMP(struct brw_compile *p, > > void > brw_untyped_atomic(struct brw_compile *p, > - struct brw_reg dest, > + struct brw_reg dst, > struct brw_reg payload, > + struct brw_reg surface, > unsigned atomic_op, > - unsigned bind_table_index, > unsigned msg_length, > bool response_expected); > > void > brw_untyped_surface_read(struct brw_compile *p, > - struct brw_reg dest, > - struct brw_reg mrf, > - unsigned bind_table_index, > + struct brw_reg dst, > + struct brw_reg payload, > + struct brw_reg surface, > unsigned msg_length, > unsigned num_channels); > > diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c > b/src/mesa/drivers/dri/i965/brw_eu_emit.c > index 0b655d4..34695bf 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c > @@ -2518,6 +2518,48 @@ brw_send_indirect_message(struct brw_compile *p, > return setup; > } > > +static struct brw_inst * > +brw_send_indirect_surface_message(struct brw_compile *p, > + unsigned sfid, > + struct brw_reg dst, > + struct brw_reg payload, > + struct brw_reg surface, > + unsigned message_len, > + unsigned response_len, > + bool header_present) > +{ > + const struct brw_context *brw = p->brw; > + struct brw_inst *insn; > + > + if (surface.file != BRW_IMMEDIATE_VALUE) { > + struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); > + > + brw_push_insn_state(p); > + brw_set_default_access_mode(p, BRW_ALIGN_1); > + brw_set_default_mask_control(p, BRW_MASK_DISABLE); > + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); > + > + /* Mask out invalid bits from the surface index to avoid hangs e.g. > when > + * some surface array is accessed out of bounds. > + */ > + insn = brw_AND(p, addr, > + suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)), > + BRW_GET_SWZ(surface.dw1.bits.swizzle, 0)), > + brw_imm_ud(0xff)); > + > + brw_pop_insn_state(p); > + > + surface = addr; > + } > + > + insn = brw_send_indirect_message(p, sfid, dst, payload, surface); > + brw_inst_set_mlen(brw, insn, message_len); > + brw_inst_set_rlen(brw, insn, response_len); > + brw_inst_set_header_present(brw, insn, header_present); > + > + return insn; > +} > + > static int > brw_find_next_block_end(struct brw_compile *p, int start_offset) > { > @@ -2747,25 +2789,16 @@ static void > brw_set_dp_untyped_atomic_message(struct brw_compile *p, > brw_inst *insn, > unsigned atomic_op, > - unsigned bind_table_index, > - unsigned msg_length, > - unsigned response_length, > - bool header_present) > + bool response_expected) > { > const struct brw_context *brw = p->brw; > - > unsigned msg_control = > atomic_op | /* Atomic Operation Type: BRW_AOP_* */ > - (response_length ? 1 << 5 : 0); /* Return data expected */ > + (response_expected ? 1 << 5 : 0); /* Return data expected */ > > if (brw->gen >= 8 || brw->is_haswell) { > - brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1, > - msg_length, response_length, > - header_present, false); > - > - > - if (brw_inst_access_mode(brw, insn) == BRW_ALIGN_1) { > - if (brw_inst_exec_size(brw, insn) != BRW_EXECUTE_16) > + if (brw_inst_access_mode(brw, p->current) == BRW_ALIGN_1) { > + if (!p->compressed) > msg_control |= 1 << 4; /* SIMD8 mode */ > > brw_inst_set_dp_msg_type(brw, insn, > @@ -2775,30 +2808,28 @@ brw_set_dp_untyped_atomic_message(struct brw_compile > *p, > HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2); > } > } else { > - brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, > - msg_length, response_length, > - header_present, false); > - > brw_inst_set_dp_msg_type(brw, insn, > GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP); > > - if (brw_inst_exec_size(brw, insn) != BRW_EXECUTE_16) > + if (!p->compressed) > msg_control |= 1 << 4; /* SIMD8 mode */ > } > > - brw_inst_set_binding_table_index(brw, insn, bind_table_index); > brw_inst_set_dp_msg_control(brw, insn, msg_control); > } > > void > brw_untyped_atomic(struct brw_compile *p, > - struct brw_reg dest, > + struct brw_reg dst, > struct brw_reg payload, > + struct brw_reg surface, > unsigned atomic_op, > - unsigned bind_table_index, > unsigned msg_length, > bool response_expected) > { > const struct brw_context *brw = p->brw; > + const unsigned sfid = (brw->gen >= 8 || brw->is_haswell ? > + HSW_SFID_DATAPORT_DATA_CACHE_1 : > + GEN7_SFID_DATAPORT_DATA_CACHE); > const bool align1 = (brw_inst_access_mode(brw, p->current) == > BRW_ALIGN_1); > /* Mask out unused components -- This is especially important in Align16 > * mode on generations that don't have native support for SIMD4x2 atomics, > @@ -2807,79 +2838,59 @@ brw_untyped_atomic(struct brw_compile *p, > * uninitialized Y, Z and W coordinates of the payload. > */ > const unsigned mask = (align1 ? WRITEMASK_XYZW : WRITEMASK_X); > - brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND); > - > - brw_set_dest(p, insn, retype(brw_writemask(dest, mask), > - BRW_REGISTER_TYPE_UD)); > - brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UD)); > - brw_set_src1(p, insn, brw_imm_d(0)); > - brw_set_dp_untyped_atomic_message( > - p, insn, atomic_op, bind_table_index, msg_length, > + struct brw_inst *insn = brw_send_indirect_surface_message( > + p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, > brw_surface_payload_size(p, response_expected, > brw->gen >= 8 || brw->is_haswell, true), > align1); > + > + brw_set_dp_untyped_atomic_message( > + p, insn, atomic_op, response_expected); > } > > static void > brw_set_dp_untyped_surface_read_message(struct brw_compile *p, > - brw_inst *insn, > - unsigned bind_table_index, > - unsigned msg_length, > - unsigned response_length, > - unsigned num_channels, > - bool header_present) > + struct brw_inst *insn, > + unsigned num_channels) > { > const struct brw_context *brw = p->brw; > - const unsigned dispatch_width = > - (brw_inst_exec_size(brw, insn) == BRW_EXECUTE_16 ? 16 : 8); > - > - if (brw->gen >= 8 || brw->is_haswell) { > - brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1, > - msg_length, response_length, > - header_present, false); > - > - brw_inst_set_dp_msg_type(brw, insn, > - HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ); > - } else { > - brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, > - msg_length, response_length, > - header_present, false); > - > - brw_inst_set_dp_msg_type(brw, insn, > - GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ); > - } > - > /* Set mask of 32-bit channels to drop. */ > unsigned msg_control = (0xf & (0xf << num_channels)); > > - if (brw_inst_access_mode(brw, insn) == BRW_ALIGN_1) { > - if (dispatch_width == 16) > + if (brw_inst_access_mode(brw, p->current) == BRW_ALIGN_1) { > + if (p->compressed) > msg_control |= 1 << 4; /* SIMD16 mode */ > else > msg_control |= 2 << 4; /* SIMD8 mode */ > } > > - brw_inst_set_binding_table_index(brw, insn, bind_table_index); > + brw_inst_set_dp_msg_type(brw, insn, > + (brw->gen >= 8 || brw->is_haswell ? > + HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ : > + GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ)); > brw_inst_set_dp_msg_control(brw, insn, msg_control); > } > > void > brw_untyped_surface_read(struct brw_compile *p, > - struct brw_reg dest, > - struct brw_reg mrf, > - unsigned bind_table_index, > + struct brw_reg dst, > + struct brw_reg payload, > + struct brw_reg surface, > unsigned msg_length, > unsigned num_channels) > { > const struct brw_context *brw = p->brw; > - brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); > + const unsigned sfid = (brw->gen >= 8 || p->brw->is_haswell ? > + HSW_SFID_DATAPORT_DATA_CACHE_1 : > + GEN7_SFID_DATAPORT_DATA_CACHE); > + const bool align1 = (brw_inst_access_mode(brw, p->current) == > BRW_ALIGN_1); > + struct brw_inst *insn = brw_send_indirect_surface_message( > + p, sfid, dst, payload, surface, msg_length, > + brw_surface_payload_size(p, num_channels, true, true), > + align1); > > - brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD)); > - brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD)); > brw_set_dp_untyped_surface_read_message( > - p, insn, bind_table_index, msg_length, > - brw_surface_payload_size(p, num_channels, true, true), > - num_channels, brw_inst_access_mode(brw, insn) == BRW_ALIGN_1); > + p, insn, num_channels); > } > > void > @@ -3080,13 +3091,16 @@ void brw_shader_time_add(struct brw_compile *p, > struct brw_reg payload, > uint32_t surf_index) > { > + const unsigned sfid = (p->brw->gen >= 8 || p->brw->is_haswell ? > + HSW_SFID_DATAPORT_DATA_CACHE_1 : > + GEN7_SFID_DATAPORT_DATA_CACHE); > assert(p->brw->gen >= 7); > > brw_push_insn_state(p); > brw_set_default_access_mode(p, BRW_ALIGN_1); > brw_set_default_mask_control(p, BRW_MASK_DISABLE); > + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
You changed brw_set_dp_untyped_atomic_message() to consider "p->compressed" instead of "brw_inst_exec_size(brw, insn) != BRW_EXECUTE_16". Here you, however, force compression contol off meaning that only one branch is taken in brw_set_dp_untyped_atomic_message(). Before both branches were possible, weren't they? > brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); > - brw_pop_insn_state(p); > > /* We use brw_vec1_reg and unmasked because we want to increment the given > * offset only once. > @@ -3095,8 +3109,10 @@ void brw_shader_time_add(struct brw_compile *p, > BRW_ARF_NULL, 0)); > brw_set_src0(p, send, brw_vec1_reg(payload.file, > payload.nr, 0)); > - brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, surf_index, > - 2 /* message length */, > - 0 /* response length */, > - false /* header present */); > + brw_set_src1(p, send, brw_imm_ud(0)); > + brw_set_message_descriptor(p, send, sfid, 2, 0, false, false); > + brw_inst_set_binding_table_index(p->brw, send, surf_index); > + brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, false); > + > + brw_pop_insn_state(p); > } > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index 2fdc3df..f739837 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -1429,7 +1429,7 @@ fs_generator::generate_untyped_atomic(fs_inst *inst, > struct brw_reg dst, > surf_index.type == BRW_REGISTER_TYPE_UD); > > brw_untyped_atomic(p, dst, payload, > - atomic_op.dw1.ud, surf_index.dw1.ud, > + surf_index, atomic_op.dw1.ud, > inst->mlen, true); > > brw_mark_surface_used(prog_data, surf_index.dw1.ud); > @@ -1443,7 +1443,7 @@ fs_generator::generate_untyped_surface_read(fs_inst > *inst, struct brw_reg dst, > assert(surf_index.file == BRW_IMMEDIATE_VALUE && > surf_index.type == BRW_REGISTER_TYPE_UD); > > - brw_untyped_surface_read(p, dst, payload, surf_index.dw1.ud, inst->mlen, > 1); > + brw_untyped_surface_read(p, dst, payload, surf_index, inst->mlen, 1); > > brw_mark_surface_used(prog_data, surf_index.dw1.ud); > } > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index 467da2d..00886c5 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -1116,7 +1116,7 @@ > vec4_generator::generate_untyped_atomic(vec4_instruction *inst, > surf_index.type == BRW_REGISTER_TYPE_UD); > > brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf), > - atomic_op.dw1.ud, surf_index.dw1.ud, > + surf_index, atomic_op.dw1.ud, > inst->mlen, true); > > brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); > @@ -1131,7 +1131,7 @@ > vec4_generator::generate_untyped_surface_read(vec4_instruction *inst, > surf_index.type == BRW_REGISTER_TYPE_UD); > > brw_untyped_surface_read(p, dst, brw_message_reg(inst->base_mrf), > - surf_index.dw1.ud, inst->mlen, 1); > + surf_index, inst->mlen, 1); > > brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); > } > -- > 2.1.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev