From: Nicolai Hähnle <nicolai.haeh...@amd.com> It seems clear that trying to multiply two pairs of doubles would result in the temporary register getting overwritten by the second pair. So make the code more explicit. --- src/gallium/drivers/r600/r600_shader.c | 36 ++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 17 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index ebe2744..7d1452a 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -4319,39 +4319,41 @@ static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int i, j, k, r; struct r600_bytecode_alu alu; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); int t1 = ctx->temp_reg; - for (k = 0; k < 2; k++) { - if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2)))) - continue; + /* t1 would get overwritten below if we actually tried to + * multiply two pairs of doubles at a time. */ + assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || + inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); - for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ctx->inst_info->op; - for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1)); - } - alu.dst.sel = t1; - alu.dst.chan = i; - alu.dst.write = 1; - if (i == 3) - alu.last = 1; - r = r600_bytecode_add_alu(ctx->bc, &alu); - if (r) - return r; + k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ctx->inst_info->op; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1)); } + alu.dst.sel = t1; + alu.dst.chan = i; + alu.dst.write = 1; + if (i == 3) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; } for (i = 0; i <= lasti; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; alu.src[0].sel = t1; alu.src[0].chan = i; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev