Piglit quick tests including sqrt pass, no other regressions,
tested on radeon 6670.
---
Should be slightly more precise than the invsqrt/recip/mul combination
used previously, I reckon up to about 2 bits of mantissa, and saves
two instructions per sqrt emitted.

It would be good if someone could test this on Cayman since it uses
a slightly different codepath.

 src/gallium/drivers/r600/r600_pipe.c   | 2 +-
 src/gallium/drivers/r600/r600_shader.c | 9 +++------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 5bf9c00..ee6a416 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -428,7 +428,7 @@ static int r600_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, e
        case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
                return 1;
        case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
-               return 0;
+               return 1;
        case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
        case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
        case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index db928f3..907547d 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6498,8 +6498,7 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_SUB,       0, ALU_OP2_ADD, tgsi_op2},
        {TGSI_OPCODE_LRP,       0, ALU_OP0_NOP, tgsi_lrp},
        {TGSI_OPCODE_CND,       0, ALU_OP0_NOP, tgsi_unsupported},
-       /* gap */
-       {20,                    0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_SQRT,      0, ALU_OP1_SQRT_IEEE, 
tgsi_trans_srcx_replicate},
        {TGSI_OPCODE_DP2A,      0, ALU_OP0_NOP, tgsi_unsupported},
        /* gap */
        {22,                    0, ALU_OP0_NOP, tgsi_unsupported},
@@ -6693,8 +6692,7 @@ static struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_SUB,       0, ALU_OP2_ADD, tgsi_op2},
        {TGSI_OPCODE_LRP,       0, ALU_OP0_NOP, tgsi_lrp},
        {TGSI_OPCODE_CND,       0, ALU_OP0_NOP, tgsi_unsupported},
-       /* gap */
-       {20,                    0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_SQRT,      0, ALU_OP1_SQRT_IEEE, 
tgsi_trans_srcx_replicate},
        {TGSI_OPCODE_DP2A,      0, ALU_OP0_NOP, tgsi_unsupported},
        /* gap */
        {22,                    0, ALU_OP0_NOP, tgsi_unsupported},
@@ -6888,8 +6886,7 @@ static struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_SUB,       0, ALU_OP2_ADD, tgsi_op2},
        {TGSI_OPCODE_LRP,       0, ALU_OP0_NOP, tgsi_lrp},
        {TGSI_OPCODE_CND,       0, ALU_OP0_NOP, tgsi_unsupported},
-       /* gap */
-       {20,                    0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_SQRT,      0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
        {TGSI_OPCODE_DP2A,      0, ALU_OP0_NOP, tgsi_unsupported},
        /* gap */
        {22,                    0, ALU_OP0_NOP, tgsi_unsupported},
-- 
1.8.3.2

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to