[Mesa-dev] [PATCH v3 23/43] i965/fs: Add byte scattered read message and fs support
--- src/intel/compiler/brw_eu.h| 7 + src/intel/compiler/brw_eu_defines.h| 2 ++ src/intel/compiler/brw_eu_emit.c | 41 ++ src/intel/compiler/brw_fs.cpp | 10 +++ src/intel/compiler/brw_fs_copy_propagation.cpp | 2 ++ src/intel/compiler/brw_fs_generator.cpp| 5 src/intel/compiler/brw_fs_surface_builder.cpp | 12 src/intel/compiler/brw_fs_surface_builder.h| 5 src/intel/compiler/brw_shader.cpp | 6 9 files changed, 90 insertions(+) diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index b44ca0f518..ca1ff21a83 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -476,6 +476,13 @@ brw_typed_surface_write(struct brw_codegen *p, unsigned num_channels); void +brw_byte_scattered_read(struct brw_codegen *p, +struct brw_reg dst, +struct brw_reg payload, +struct brw_reg surface, +unsigned msg_length); + +void brw_byte_scattered_write(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 9aac385ba7..c5dc5fd5fb 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -397,6 +397,8 @@ enum opcode { * opcode, but instead of taking a single payload blog they expect their * arguments separately as individual sources, like untyped write/read. */ + SHADER_OPCODE_BYTE_SCATTERED_READ, + SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL, SHADER_OPCODE_BYTE_SCATTERED_WRITE, SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 84d85be653..8c83d8b500 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -2929,6 +2929,47 @@ brw_untyped_surface_write(struct brw_codegen *p, p, insn, num_channels); } + + +static void +brw_set_dp_byte_scattered_read_message(struct brw_codegen *p, + struct brw_inst *insn) +{ + + const struct gen_device_info *devinfo = p->devinfo; + /* Set mask of 32-bit channels to drop. */ + unsigned msg_control = GEN7_BYTE_SCATTERED_DATA_SIZE_WORD << 2; + + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { + if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16) + msg_control |= 1; /* SIMD16 mode */ + else + msg_control |= 2; /* SIMD8 mode */ + } + + brw_inst_set_dp_msg_type(devinfo, insn, +(devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ : + GEN7_DATAPORT_DC_BYTE_SCATTERED_READ)); + brw_inst_set_dp_msg_control(devinfo, insn, msg_control); +} + +void +brw_byte_scattered_read(struct brw_codegen *p, +struct brw_reg dst, +struct brw_reg payload, +struct brw_reg surface, +unsigned msg_length) +{ + const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE; + struct brw_inst *insn = brw_send_indirect_scattered_message( + p, sfid, dst, payload, surface, msg_length, + brw_surface_payload_size(p, 1, true, true), + false); + + brw_set_dp_byte_scattered_read_message(p, insn); +} + static void brw_set_dp_byte_scattered_write(struct brw_codegen *p, struct brw_inst *insn) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index e4a94ff053..bd0d32b741 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -251,6 +251,7 @@ fs_inst::is_send_from_grf() const case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: case SHADER_OPCODE_BYTE_SCATTERED_WRITE: + case SHADER_OPCODE_BYTE_SCATTERED_READ: case SHADER_OPCODE_TYPED_ATOMIC: case SHADER_OPCODE_TYPED_SURFACE_READ: case SHADER_OPCODE_TYPED_SURFACE_WRITE: @@ -733,6 +734,7 @@ fs_inst::components_read(unsigned i) const case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: assert(src[3].file == IMM); /* Surface coordinates. */ if (i == 0) @@ -800,6 +802,7 @@ fs_inst::size_read(int arg) const case SHADER_OPCODE_TYPED_SURFACE_WRITE: case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: case SHADER_OPCODE_BYTE_SCATTERED_WRITE: + case SHADER_OPCODE_BYTE_SCATTERED_READ: if (arg == 0) return mlen * REG_SIZE; break; @@ -4527,6 +4530,12 @@ fs_visitor::lower_logical_sends() ibld.sample_mas
Re: [Mesa-dev] [PATCH v3 23/43] i965/fs: Add byte scattered read message and fs support
On Thu, Oct 12, 2017 at 08:38:12PM +0200, Jose Maria Casanova Crespo wrote: > --- > src/intel/compiler/brw_eu.h| 7 + > src/intel/compiler/brw_eu_defines.h| 2 ++ > src/intel/compiler/brw_eu_emit.c | 41 > ++ > src/intel/compiler/brw_fs.cpp | 10 +++ > src/intel/compiler/brw_fs_copy_propagation.cpp | 2 ++ > src/intel/compiler/brw_fs_generator.cpp| 5 > src/intel/compiler/brw_fs_surface_builder.cpp | 12 > src/intel/compiler/brw_fs_surface_builder.h| 5 > src/intel/compiler/brw_shader.cpp | 6 > 9 files changed, 90 insertions(+) > > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h > index b44ca0f518..ca1ff21a83 100644 > --- a/src/intel/compiler/brw_eu.h > +++ b/src/intel/compiler/brw_eu.h > @@ -476,6 +476,13 @@ brw_typed_surface_write(struct brw_codegen *p, > unsigned num_channels); > > void > +brw_byte_scattered_read(struct brw_codegen *p, > +struct brw_reg dst, > +struct brw_reg payload, > +struct brw_reg surface, > +unsigned msg_length); > + > +void > brw_byte_scattered_write(struct brw_codegen *p, > struct brw_reg payload, > struct brw_reg surface, > diff --git a/src/intel/compiler/brw_eu_defines.h > b/src/intel/compiler/brw_eu_defines.h > index 9aac385ba7..c5dc5fd5fb 100644 > --- a/src/intel/compiler/brw_eu_defines.h > +++ b/src/intel/compiler/brw_eu_defines.h > @@ -397,6 +397,8 @@ enum opcode { > * opcode, but instead of taking a single payload blog they expect their > * arguments separately as individual sources, like untyped write/read. > */ > + SHADER_OPCODE_BYTE_SCATTERED_READ, > + SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL, > SHADER_OPCODE_BYTE_SCATTERED_WRITE, > SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL, > > diff --git a/src/intel/compiler/brw_eu_emit.c > b/src/intel/compiler/brw_eu_emit.c > index 84d85be653..8c83d8b500 100644 > --- a/src/intel/compiler/brw_eu_emit.c > +++ b/src/intel/compiler/brw_eu_emit.c > @@ -2929,6 +2929,47 @@ brw_untyped_surface_write(struct brw_codegen *p, >p, insn, num_channels); > } > > + > + > +static void > +brw_set_dp_byte_scattered_read_message(struct brw_codegen *p, > + struct brw_inst *insn) > +{ > + > + const struct gen_device_info *devinfo = p->devinfo; > + /* Set mask of 32-bit channels to drop. */ > + unsigned msg_control = GEN7_BYTE_SCATTERED_DATA_SIZE_WORD << 2; > + > + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { > + if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16) > + msg_control |= 1; /* SIMD16 mode */ > + else > + msg_control |= 2; /* SIMD8 mode */ > + } > + > + brw_inst_set_dp_msg_type(devinfo, insn, > +(devinfo->gen >= 8 || devinfo->is_haswell ? > + HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ : > + GEN7_DATAPORT_DC_BYTE_SCATTERED_READ)); > + brw_inst_set_dp_msg_control(devinfo, insn, msg_control); > +} > + > +void > +brw_byte_scattered_read(struct brw_codegen *p, > +struct brw_reg dst, > +struct brw_reg payload, > +struct brw_reg surface, > +unsigned msg_length) > +{ > + const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE; > + struct brw_inst *insn = brw_send_indirect_scattered_message( > + p, sfid, dst, payload, surface, msg_length, > + brw_surface_payload_size(p, 1, true, true), > + false); > + > + brw_set_dp_byte_scattered_read_message(p, insn); > +} > + > static void > brw_set_dp_byte_scattered_write(struct brw_codegen *p, > struct brw_inst *insn) > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp > index e4a94ff053..bd0d32b741 100644 > --- a/src/intel/compiler/brw_fs.cpp > +++ b/src/intel/compiler/brw_fs.cpp > @@ -251,6 +251,7 @@ fs_inst::is_send_from_grf() const > case SHADER_OPCODE_UNTYPED_SURFACE_READ: > case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: > case SHADER_OPCODE_BYTE_SCATTERED_WRITE: > + case SHADER_OPCODE_BYTE_SCATTERED_READ: > case SHADER_OPCODE_TYPED_ATOMIC: > case SHADER_OPCODE_TYPED_SURFACE_READ: > case SHADER_OPCODE_TYPED_SURFACE_WRITE: > @@ -733,6 +734,7 @@ fs_inst::components_read(unsigned i) const > > case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: > case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: > + case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: >assert(src[3].file == IMM); >/* Surface coordinates. */ >if (i == 0) > @@ -800,6 +802,7 @@ fs_inst::size_read(int arg) const > case SHADER_OPCODE_TYPED_SURFACE_WRITE:
Re: [Mesa-dev] [PATCH v3 23/43] i965/fs: Add byte scattered read message and fs support
On 15/10/17 11:47, Pohjolainen, Topi wrote: > On Thu, Oct 12, 2017 at 08:38:12PM +0200, Jose Maria Casanova Crespo wrote: >> --- >> src/intel/compiler/brw_eu.h| 7 + >> src/intel/compiler/brw_eu_defines.h| 2 ++ >> src/intel/compiler/brw_eu_emit.c | 41 >> ++ >> src/intel/compiler/brw_fs.cpp | 10 +++ >> src/intel/compiler/brw_fs_copy_propagation.cpp | 2 ++ >> src/intel/compiler/brw_fs_generator.cpp| 5 >> src/intel/compiler/brw_fs_surface_builder.cpp | 12 >> src/intel/compiler/brw_fs_surface_builder.h| 5 >> src/intel/compiler/brw_shader.cpp | 6 >> 9 files changed, 90 insertions(+) >> >> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h >> index b44ca0f518..ca1ff21a83 100644 >> --- a/src/intel/compiler/brw_eu.h >> +++ b/src/intel/compiler/brw_eu.h >> @@ -476,6 +476,13 @@ brw_typed_surface_write(struct brw_codegen *p, >> unsigned num_channels); >> >> void >> +brw_byte_scattered_read(struct brw_codegen *p, >> +struct brw_reg dst, >> +struct brw_reg payload, >> +struct brw_reg surface, >> +unsigned msg_length); >> + >> +void >> brw_byte_scattered_write(struct brw_codegen *p, >> struct brw_reg payload, >> struct brw_reg surface, >> diff --git a/src/intel/compiler/brw_eu_defines.h >> b/src/intel/compiler/brw_eu_defines.h >> index 9aac385ba7..c5dc5fd5fb 100644 >> --- a/src/intel/compiler/brw_eu_defines.h >> +++ b/src/intel/compiler/brw_eu_defines.h >> @@ -397,6 +397,8 @@ enum opcode { >> * opcode, but instead of taking a single payload blog they expect their >> * arguments separately as individual sources, like untyped write/read. >> */ >> + SHADER_OPCODE_BYTE_SCATTERED_READ, >> + SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL, >> SHADER_OPCODE_BYTE_SCATTERED_WRITE, >> SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL, >> >> diff --git a/src/intel/compiler/brw_eu_emit.c >> b/src/intel/compiler/brw_eu_emit.c >> index 84d85be653..8c83d8b500 100644 >> --- a/src/intel/compiler/brw_eu_emit.c >> +++ b/src/intel/compiler/brw_eu_emit.c >> @@ -2929,6 +2929,47 @@ brw_untyped_surface_write(struct brw_codegen *p, >>p, insn, num_channels); >> } >> >> + >> + >> +static void >> +brw_set_dp_byte_scattered_read_message(struct brw_codegen *p, >> + struct brw_inst *insn) >> +{ >> + >> + const struct gen_device_info *devinfo = p->devinfo; >> + /* Set mask of 32-bit channels to drop. */ >> + unsigned msg_control = GEN7_BYTE_SCATTERED_DATA_SIZE_WORD << 2; >> + >> + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { >> + if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16) >> + msg_control |= 1; /* SIMD16 mode */ >> + else >> + msg_control |= 2; /* SIMD8 mode */ >> + } >> + >> + brw_inst_set_dp_msg_type(devinfo, insn, >> +(devinfo->gen >= 8 || devinfo->is_haswell ? >> + HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ : >> + GEN7_DATAPORT_DC_BYTE_SCATTERED_READ)); >> + brw_inst_set_dp_msg_control(devinfo, insn, msg_control); >> +} >> + >> +void >> +brw_byte_scattered_read(struct brw_codegen *p, >> +struct brw_reg dst, >> +struct brw_reg payload, >> +struct brw_reg surface, >> +unsigned msg_length) >> +{ >> + const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE; >> + struct brw_inst *insn = brw_send_indirect_scattered_message( >> + p, sfid, dst, payload, surface, msg_length, >> + brw_surface_payload_size(p, 1, true, true), >> + false); >> + >> + brw_set_dp_byte_scattered_read_message(p, insn); >> +} >> + >> static void >> brw_set_dp_byte_scattered_write(struct brw_codegen *p, >> struct brw_inst *insn) >> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp >> index e4a94ff053..bd0d32b741 100644 >> --- a/src/intel/compiler/brw_fs.cpp >> +++ b/src/intel/compiler/brw_fs.cpp >> @@ -251,6 +251,7 @@ fs_inst::is_send_from_grf() const >> case SHADER_OPCODE_UNTYPED_SURFACE_READ: >> case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: >> case SHADER_OPCODE_BYTE_SCATTERED_WRITE: >> + case SHADER_OPCODE_BYTE_SCATTERED_READ: >> case SHADER_OPCODE_TYPED_ATOMIC: >> case SHADER_OPCODE_TYPED_SURFACE_READ: >> case SHADER_OPCODE_TYPED_SURFACE_WRITE: >> @@ -733,6 +734,7 @@ fs_inst::components_read(unsigned i) const >> >> case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: >> case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: >> + case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: >>assert(src[3].file == IM
Re: [Mesa-dev] [PATCH v3 23/43] i965/fs: Add byte scattered read message and fs support
On Thu, Oct 12, 2017 at 11:38 AM, Jose Maria Casanova Crespo < jmcasan...@igalia.com> wrote: > --- > src/intel/compiler/brw_eu.h| 7 + > src/intel/compiler/brw_eu_defines.h| 2 ++ > src/intel/compiler/brw_eu_emit.c | 41 > ++ > src/intel/compiler/brw_fs.cpp | 10 +++ > src/intel/compiler/brw_fs_copy_propagation.cpp | 2 ++ > src/intel/compiler/brw_fs_generator.cpp| 5 > src/intel/compiler/brw_fs_surface_builder.cpp | 12 > src/intel/compiler/brw_fs_surface_builder.h| 5 > src/intel/compiler/brw_shader.cpp | 6 > 9 files changed, 90 insertions(+) > > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h > index b44ca0f518..ca1ff21a83 100644 > --- a/src/intel/compiler/brw_eu.h > +++ b/src/intel/compiler/brw_eu.h > @@ -476,6 +476,13 @@ brw_typed_surface_write(struct brw_codegen *p, > unsigned num_channels); > > void > +brw_byte_scattered_read(struct brw_codegen *p, > +struct brw_reg dst, > +struct brw_reg payload, > +struct brw_reg surface, > +unsigned msg_length); > + > +void > brw_byte_scattered_write(struct brw_codegen *p, > struct brw_reg payload, > struct brw_reg surface, > diff --git a/src/intel/compiler/brw_eu_defines.h > b/src/intel/compiler/brw_eu_defines.h > index 9aac385ba7..c5dc5fd5fb 100644 > --- a/src/intel/compiler/brw_eu_defines.h > +++ b/src/intel/compiler/brw_eu_defines.h > @@ -397,6 +397,8 @@ enum opcode { > * opcode, but instead of taking a single payload blog they expect > their > * arguments separately as individual sources, like untyped write/read. > */ > + SHADER_OPCODE_BYTE_SCATTERED_READ, > + SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL, > SHADER_OPCODE_BYTE_SCATTERED_WRITE, > SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL, > > diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_ > emit.c > index 84d85be653..8c83d8b500 100644 > --- a/src/intel/compiler/brw_eu_emit.c > +++ b/src/intel/compiler/brw_eu_emit.c > @@ -2929,6 +2929,47 @@ brw_untyped_surface_write(struct brw_codegen *p, >p, insn, num_channels); > } > > + > + > +static void > +brw_set_dp_byte_scattered_read_message(struct brw_codegen *p, > + struct brw_inst *insn) > +{ > + > + const struct gen_device_info *devinfo = p->devinfo; > + /* Set mask of 32-bit channels to drop. */ > + unsigned msg_control = GEN7_BYTE_SCATTERED_DATA_SIZE_WORD << 2; > + > + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { > + if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16) > + msg_control |= 1; /* SIMD16 mode */ > + else > + msg_control |= 2; /* SIMD8 mode */ > Is this really supposed to be 2? From my reading of the BDW docs, it looks like you want 1 and 0. 2 gives you 0 in the SIMD mode bit (which indicates SIMD8) and a 1 in a reserved MBZ bit. > + } > I think we probably want an else unreachable here. Also, we want that for scattered_write. > + > + brw_inst_set_dp_msg_type(devinfo, insn, > +(devinfo->gen >= 8 || devinfo->is_haswell ? > + HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ : > + GEN7_DATAPORT_DC_BYTE_SCATTERED_READ)); > + brw_inst_set_dp_msg_control(devinfo, insn, msg_control); > +} > + > +void > +brw_byte_scattered_read(struct brw_codegen *p, > +struct brw_reg dst, > +struct brw_reg payload, > +struct brw_reg surface, > +unsigned msg_length) > +{ > + const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE; > + struct brw_inst *insn = brw_send_indirect_scattered_message( > + p, sfid, dst, payload, surface, msg_length, > + brw_surface_payload_size(p, 1, true, true), > + false); > + > + brw_set_dp_byte_scattered_read_message(p, insn); > Again, I think we can inline this > +} > + > static void > brw_set_dp_byte_scattered_write(struct brw_codegen *p, > struct brw_inst *insn) > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp > index e4a94ff053..bd0d32b741 100644 > --- a/src/intel/compiler/brw_fs.cpp > +++ b/src/intel/compiler/brw_fs.cpp > @@ -251,6 +251,7 @@ fs_inst::is_send_from_grf() const > case SHADER_OPCODE_UNTYPED_SURFACE_READ: > case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: > case SHADER_OPCODE_BYTE_SCATTERED_WRITE: > + case SHADER_OPCODE_BYTE_SCATTERED_READ: > case SHADER_OPCODE_TYPED_ATOMIC: > case SHADER_OPCODE_TYPED_SURFACE_READ: > case SHADER_OPCODE_TYPED_SURFACE_WRITE: > @@ -733,6 +734,7 @@ fs_inst::components_read(unsigned i) const > > case SHADER_
Re: [Mesa-dev] [PATCH v3 23/43] i965/fs: Add byte scattered read message and fs support
On Wed, Nov 1, 2017 at 12:09 PM, Jason Ekstrand wrote: > On Thu, Oct 12, 2017 at 11:38 AM, Jose Maria Casanova Crespo < > jmcasan...@igalia.com> wrote: > >> --- >> src/intel/compiler/brw_eu.h| 7 + >> src/intel/compiler/brw_eu_defines.h| 2 ++ >> src/intel/compiler/brw_eu_emit.c | 41 >> ++ >> src/intel/compiler/brw_fs.cpp | 10 +++ >> src/intel/compiler/brw_fs_copy_propagation.cpp | 2 ++ >> src/intel/compiler/brw_fs_generator.cpp| 5 >> src/intel/compiler/brw_fs_surface_builder.cpp | 12 >> src/intel/compiler/brw_fs_surface_builder.h| 5 >> src/intel/compiler/brw_shader.cpp | 6 >> 9 files changed, 90 insertions(+) >> >> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h >> index b44ca0f518..ca1ff21a83 100644 >> --- a/src/intel/compiler/brw_eu.h >> +++ b/src/intel/compiler/brw_eu.h >> @@ -476,6 +476,13 @@ brw_typed_surface_write(struct brw_codegen *p, >> unsigned num_channels); >> >> void >> +brw_byte_scattered_read(struct brw_codegen *p, >> +struct brw_reg dst, >> +struct brw_reg payload, >> +struct brw_reg surface, >> +unsigned msg_length); >> + >> +void >> brw_byte_scattered_write(struct brw_codegen *p, >> struct brw_reg payload, >> struct brw_reg surface, >> diff --git a/src/intel/compiler/brw_eu_defines.h >> b/src/intel/compiler/brw_eu_defines.h >> index 9aac385ba7..c5dc5fd5fb 100644 >> --- a/src/intel/compiler/brw_eu_defines.h >> +++ b/src/intel/compiler/brw_eu_defines.h >> @@ -397,6 +397,8 @@ enum opcode { >> * opcode, but instead of taking a single payload blog they expect >> their >> * arguments separately as individual sources, like untyped >> write/read. >> */ >> + SHADER_OPCODE_BYTE_SCATTERED_READ, >> + SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL, >> SHADER_OPCODE_BYTE_SCATTERED_WRITE, >> SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL, >> >> diff --git a/src/intel/compiler/brw_eu_emit.c >> b/src/intel/compiler/brw_eu_emit.c >> index 84d85be653..8c83d8b500 100644 >> --- a/src/intel/compiler/brw_eu_emit.c >> +++ b/src/intel/compiler/brw_eu_emit.c >> @@ -2929,6 +2929,47 @@ brw_untyped_surface_write(struct brw_codegen *p, >>p, insn, num_channels); >> } >> >> + >> + >> +static void >> +brw_set_dp_byte_scattered_read_message(struct brw_codegen *p, >> + struct brw_inst *insn) >> +{ >> + >> + const struct gen_device_info *devinfo = p->devinfo; >> + /* Set mask of 32-bit channels to drop. */ >> + unsigned msg_control = GEN7_BYTE_SCATTERED_DATA_SIZE_WORD << 2; >> > As I commented before, I don't think we want to make this assumption. > + >> + if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { >> + if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16) >> + msg_control |= 1; /* SIMD16 mode */ >> + else >> + msg_control |= 2; /* SIMD8 mode */ >> > > Is this really supposed to be 2? From my reading of the BDW docs, it > looks like you want 1 and 0. 2 gives you 0 in the SIMD mode bit (which > indicates SIMD8) and a 1 in a reserved MBZ bit. > > >> + } >> > > I think we probably want an else unreachable here. Also, we want that for > scattered_write. > > >> + >> + brw_inst_set_dp_msg_type(devinfo, insn, >> +(devinfo->gen >= 8 || devinfo->is_haswell ? >> + HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ : >> + GEN7_DATAPORT_DC_BYTE_SCATTERED_READ)); >> + brw_inst_set_dp_msg_control(devinfo, insn, msg_control); >> +} >> + >> +void >> +brw_byte_scattered_read(struct brw_codegen *p, >> +struct brw_reg dst, >> +struct brw_reg payload, >> +struct brw_reg surface, >> +unsigned msg_length) >> +{ >> + const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE; >> + struct brw_inst *insn = brw_send_indirect_scattered_message( >> + p, sfid, dst, payload, surface, msg_length, >> + brw_surface_payload_size(p, 1, true, true), >> + false); >> + >> + brw_set_dp_byte_scattered_read_message(p, insn); >> > > Again, I think we can inline this > > >> +} >> + >> static void >> brw_set_dp_byte_scattered_write(struct brw_codegen *p, >> struct brw_inst *insn) >> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cp >> p >> index e4a94ff053..bd0d32b741 100644 >> --- a/src/intel/compiler/brw_fs.cpp >> +++ b/src/intel/compiler/brw_fs.cpp >> @@ -251,6 +251,7 @@ fs_inst::is_send_from_grf() const >> case SHADER_OPCODE_UNTYPED_SURFACE_READ: >> case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: >> case SHADER_OPCODE_BYTE_SCA