For a register source/destination of an instruction the function returns the read/write byte pattern of a 32-byte registers as a unsigned int.
The returned pattern takes into account the exec_size of the instruction, the type bitsize, the stride and if the register is source or destination. The objective of the functions if to help to know the read/written bytes of the instructions to improve the liveness analysis for partial read/writes. We manage special cases for SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL and SHADER_OPCODE_BYTE_SCATTERED_WRITE because depending of the bitsize parameter they have a different read pattern. --- src/intel/compiler/brw_fs.cpp | 183 +++++++++++++++++++++++++++++++++ src/intel/compiler/brw_ir_fs.h | 1 + 2 files changed, 184 insertions(+) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 2b8363ca362..f3045c4ff6c 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -687,6 +687,189 @@ fs_inst::is_partial_write() const this->dst.offset % REG_SIZE != 0); } +/** + * Returns a 32-bit uint whose bits represent if the associated register byte + * has been read/written by the instruction. The returned pattern takes into + * account the exec_size of the instruction, the type bitsize and the register + * stride and the register is source or destination for the instruction. + * + * The objective of this function is to identify which parts of the register + * are read or written for operations that don't read/write a full register. + * So we can identify in live range variable analysis if a partial write has + * completelly defined the part of the register used by a partial read. So we + * avoid extending the liveness range because all data read was already + * defined although the wasn't completely written. + */ +unsigned +fs_inst::register_byte_use_pattern(const fs_reg &r, boolean is_dst) const +{ + if (is_dst) { + /* We don't know what is written so we return the worts case */ + if (this->predicate && this->opcode != BRW_OPCODE_SEL) + return 0; + /* We assume that send destinations are completelly written */ + if (this->is_send_from_grf()) + return ~0u; + } else { + /* byte_scattered_write_logical pattern of src[1] is 32-bit aligned + * so the read pattern depends on the bitsize stored at src[4] + */ + if (this->opcode == SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL && + this->src[1].nr == r.nr) { + switch (this->src[4].ud) { + case 32: + return ~0u; + case 16: + return 0x33333333; + case 8: + return 0x11111111; + default: + unreachable("Unsupported bitsize at byte_scattered_write_logical"); + } + } + /* As for byte_scattered_write_logical but we need to take into account + * that data written are in the payload offset 32 with SIMD8 and offset + * 64 with SIMD16. + */ + if (this->opcode == SHADER_OPCODE_BYTE_SCATTERED_WRITE && + this->src[0].nr == r.nr) { + fs_reg payload = this->src[0]; + payload.offset = REG_SIZE * this->exec_size / 8; + if (regions_overlap(r, REG_SIZE, + payload, REG_SIZE * this->exec_size / 8)) { + switch (this->src[2].ud) { + case 32: + return ~0u; + case 16: + return 0x33333333; + case 8: + return 0x11111111; + default: + unreachable("Unsupported bitsize at byte_scattered_write"); + } + } else { + return ~0u; + } + } + } + + /* We define the most conservative value in order to calculate liveness + * range. If it is a destination nothing is defined and if is a source + * all the bytes of the register could be read. So for release builds + * the unreachables would have always safe return value. */ + unsigned pattern = is_dst ? 0 : ~0u; + + /* In the general case we calculate the pattern for a specific register + * on base of the type_size and stride. We calculate the SIMD8 pattern + * and then we adjust the patter if needed for different exec_sizes + * and offset + */ + switch (type_sz(r.type)){ + case 1: + switch (r.stride) { + case 0: + pattern = 0X1; + break; + case 1: + pattern = 0xff; + break; + case 2: + pattern = 0x5555; + break; + case 4: + pattern = 0x11111111; + break; + case 8: + pattern = 0x01010101; + break; + default: + unreachable("Unknown pattern unsupported 8-bit stride"); + } + break; + case 2: + switch (r.stride) { + case 0: + pattern = 0X3; + break; + case 1: + pattern = 0xffff; + break; + case 2: + pattern = 0x33333333; + break; + case 4: + pattern = 0x03030303; + break; + case 8: + pattern = 0x00030003; + break; + default: + unreachable("Unknown pattern unsupported 16-bit stride"); + } + break; + case 4: + switch (r.stride) { + case 0: + pattern = 0Xf; + break; + case 1: + pattern = ~0u; + break; + case 2: + pattern = 0x0f0f0f0f; + break; + case 4: + pattern = 0x000f000f; + break; + default: + unreachable("Unknown pattern unsupported 32-bit stride"); + } + break; + case 8: + switch (r.stride) { + case 0: + pattern = 0Xff; + break; + case 1: + pattern = ~0u; + break; + case 2: + pattern = 0x00ff00ff; + break; + case 4: + pattern = 0xff; + break; + default: + unreachable("Unknown pattern unsupported 64-bit stride"); + } + break; + default: + unreachable("Unknown pattern for unsupported bitsize "); + } + + if (this->exec_size > 8 && r.stride * type_sz(r.type) * 8 < REG_SIZE) { + /* For exec_size greater than SIMD8 we repeat the pattern until it + * represents a full register already represent a full register */ + pattern = pattern | (pattern << (8 * r.stride * type_sz(r.type))); + if (this->exec_size > 16 && r.stride * type_sz(r.type) * 16 < REG_SIZE) + pattern = pattern | (pattern << (16 * r.stride * type_sz(r.type))); + } else if (this->exec_size < 8 && + r.stride * type_sz(r.type) * this->exec_size < REG_SIZE) { + /* For exec_size smaller than SIMD8 we reduce the pattern if its size + * is smaller than a full register. */ + pattern = pattern >> (MIN2(REG_SIZE, 8 * type_sz(r.type) * r.stride) - + this->exec_size * type_sz(r.type) * r.stride); + } + + /* We adjust the pattern to the byte_offset of the register */ + pattern = pattern << (r.offset % REG_SIZE); + + assert(pattern); + + return pattern; +} + + unsigned fs_inst::components_read(unsigned i) const { diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 92dad269a34..5ea6294b8ad 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -350,6 +350,7 @@ public: bool equals(fs_inst *inst) const; bool is_send_from_grf() const; bool is_partial_write() const; + unsigned register_byte_use_pattern(const fs_reg &r, boolean is_dst) const; bool is_copy_payload(const brw::simple_allocator &grf_alloc) const; unsigned components_read(unsigned i) const; unsigned size_read(int arg) const; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev