These need to be emitted as align1 MOV's, since they need to have a stride of 2 on the float register (whether src or dest) so that data from another thread doesn't cross the middle of a SIMD8 register.
v2 (Iago): - The float-to-double needs to align 32-bit data to 64-bit before doing the conversion. This was doable in align16 when we tried to use an execsize of 4, but with an execsize of 8 we would need another align1 opcode to do that (since we need data to cross the middle of a SIMD register). Just making the opcode handle this internally seems more practical that adding another opcode just for this purpose and having the caller know about this before converting. - The double-to-float conversion produces 32-bit elements aligned to 64-bit so we make the opcode re-pack the result to 32-bit and fit in one register, as expected by SIMD4x2 operation. This still requires that callers reserve two registers for the float data destination because we need to produce 64-bit aligned data first, and repack it later on the same destination register, but it saves the need for a re-pack opcode only to achieve this making the operation complete in a single opcode. Hopefully that is worth the weirdness of the double register allocation... Signed-off-by: Connor Abbott <connor.w.abb...@intel.com> Signed-off-by: Iago Toral Quiroga <ito...@igalia.com> --- src/mesa/drivers/dri/i965/brw_defines.h | 2 ++ src/mesa/drivers/dri/i965/brw_shader.cpp | 4 +++ src/mesa/drivers/dri/i965/brw_vec4.cpp | 8 +++++ src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 44 ++++++++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index b5a259e..10db5f0 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1094,6 +1094,8 @@ enum opcode { VEC4_OPCODE_MOV_BYTES, VEC4_OPCODE_PACK_BYTES, VEC4_OPCODE_UNPACK_UNIFORM, + VEC4_OPCODE_DOUBLE_TO_FLOAT, + VEC4_OPCODE_FLOAT_TO_DOUBLE, FS_OPCODE_DDX_COARSE, FS_OPCODE_DDX_FINE, diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index f3b5487..5fb1d68 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -313,6 +313,10 @@ brw_instruction_name(const struct brw_device_info *devinfo, enum opcode op) return "pack_bytes"; case VEC4_OPCODE_UNPACK_UNIFORM: return "unpack_uniform"; + case VEC4_OPCODE_DOUBLE_TO_FLOAT: + return "double_to_float"; + case VEC4_OPCODE_FLOAT_TO_DOUBLE: + return "float_to_double"; case FS_OPCODE_DDX_COARSE: return "ddx_coarse"; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index bf6701e..d4b1e9c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -243,6 +243,8 @@ vec4_instruction::can_do_writemask(const struct brw_device_info *devinfo) { switch (opcode) { case SHADER_OPCODE_GEN4_SCRATCH_READ: + case VEC4_OPCODE_DOUBLE_TO_FLOAT: + case VEC4_OPCODE_FLOAT_TO_DOUBLE: case VS_OPCODE_PULL_CONSTANT_LOAD: case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: @@ -493,6 +495,12 @@ vec4_visitor::opt_reduce_swizzle() case BRW_OPCODE_DP2: swizzle = brw_swizzle_for_size(2); break; + + case VEC4_OPCODE_FLOAT_TO_DOUBLE: + case VEC4_OPCODE_DOUBLE_TO_FLOAT: + swizzle = brw_swizzle_for_size(4); + break; + default: swizzle = brw_swizzle_for_mask(inst->dst.writemask); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 3878c4a..d47b489 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1910,6 +1910,50 @@ generate_code(struct brw_codegen *p, break; } + case VEC4_OPCODE_DOUBLE_TO_FLOAT: { + assert(src[0].type == BRW_REGISTER_TYPE_DF); + assert(dst.type == BRW_REGISTER_TYPE_F); + + brw_set_default_access_mode(p, BRW_ALIGN_1); + + dst.hstride = BRW_HORIZONTAL_STRIDE_2; + dst.width = BRW_WIDTH_4; + src[0].vstride = BRW_VERTICAL_STRIDE_4; + src[0].width = BRW_WIDTH_4; + brw_MOV(p, dst, src[0]); + + struct brw_reg dst_as_src = dst; + dst.hstride = BRW_HORIZONTAL_STRIDE_1; + dst.width = BRW_WIDTH_8; + brw_MOV(p, dst, dst_as_src); + + brw_set_default_access_mode(p, BRW_ALIGN_16); + break; + } + + case VEC4_OPCODE_FLOAT_TO_DOUBLE: { + assert(src[0].type == BRW_REGISTER_TYPE_F); + assert(dst.type == BRW_REGISTER_TYPE_DF); + + brw_set_default_access_mode(p, BRW_ALIGN_1); + + struct brw_reg tmp = retype(dst, src[0].type); + tmp.hstride = BRW_HORIZONTAL_STRIDE_2; + tmp.width = BRW_WIDTH_4; + src[0].vstride = BRW_VERTICAL_STRIDE_4; + src[0].hstride = BRW_HORIZONTAL_STRIDE_1; + src[0].width = BRW_WIDTH_4; + brw_MOV(p, tmp, src[0]); + + tmp.vstride = BRW_VERTICAL_STRIDE_8; + tmp.hstride = BRW_HORIZONTAL_STRIDE_2; + tmp.width = BRW_WIDTH_4; + brw_MOV(p, dst, tmp); + + brw_set_default_access_mode(p, BRW_ALIGN_16); + break; + } + case VEC4_OPCODE_PACK_BYTES: { /* Is effectively: * -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev