These patches add support to double opcodes as discussed in mail list. The opcodes create are: movd, ddiv, dadd, dseq, dmax, dmin, dmul, dmuladd, drcp and dslt. They are used like suggested by Zack:
MOVD A.xy, C.xy, c.xy where x is the lsb and y is the msb. There are still missing some opcodes being implemented(i will send the code soon), they are: dfrac, dfracexp, dldexp and convert between float and double. Revision 2 update: In revision 2 we remove the create_double function it is not used, change the MULADD opcode to DMAD and add a documentation to new opcodes. Michal: i am seeing the double opcode branch i can move the opcode codes to use the exec_double_binary/unary Igor
From 83f895a235e76d8d556411fd0154650a2598acd0 Mon Sep 17 00:00:00 2001 From: Igor Oliveira <igor.olive...@openbossa.org> Date: Tue, 12 Jan 2010 07:40:50 -0400 Subject: [PATCH 1/3] tgsi: add double opcodes --- src/gallium/include/pipe/p_shader_tokens.h | 13 ++++++++++++- 1 files changed, 12 insertions(+), 1 deletions(-) diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 550e2ab..789edaa 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -319,7 +319,18 @@ struct tgsi_property_data { #define TGSI_OPCODE_CASE 142 #define TGSI_OPCODE_DEFAULT 143 #define TGSI_OPCODE_ENDSWITCH 144 -#define TGSI_OPCODE_LAST 145 + +#define TGSI_OPCODE_MOVD 145 +#define TGSI_OPCODE_DDIV 146 +#define TGSI_OPCODE_DADD 147 +#define TGSI_OPCODE_DSEQ 148 +#define TGSI_OPCODE_DMAX 149 +#define TGSI_OPCODE_DMIN 150 +#define TGSI_OPCODE_DMUL 151 +#define TGSI_OPCODE_DMAD 152 +#define TGSI_OPCODE_DRCP 153 +#define TGSI_OPCODE_DSLT 154 +#define TGSI_OPCODE_LAST 155 #define TGSI_SAT_NONE 0 /* do not saturate */ #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ -- 1.6.3.3
From 91d50bdbd6f35af9a0e342c46c8ee5fbe0910421 Mon Sep 17 00:00:00 2001 From: Igor Oliveira <igor.olive...@openbossa.org> Date: Tue, 12 Jan 2010 07:41:08 -0400 Subject: [PATCH 2/3] tgsi: implement double opcodes --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 230 +++++++++++++++++++++++++- src/gallium/auxiliary/tgsi/tgsi_info.c | 10 + src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 11 +- 3 files changed, 249 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index f43233b..4f2b29c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -69,6 +69,15 @@ #define TILE_BOTTOM_LEFT 2 #define TILE_BOTTOM_RIGHT 3 +union tgsi_double { + struct int_double { + int lsb; + int msb; + double d; + } id; + double d; +}; + static void micro_abs(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -380,6 +389,184 @@ micro_trunc(union tgsi_exec_channel *dst, dst->f[3] = (float)(int)src->f[3]; } +static void +micro_movd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + union tgsi_double dsrc, ddst; + + dsrc.id.lsb = src->u[0]; + dsrc.id.msb = src->u[1]; + + ddst.d = dsrc.d; + + dst->u[0] = ddst.id.lsb; + dst->u[1] = ddst.id.msb; +} + +static void +micro_dadd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + union tgsi_double dsrc0, dsrc1, ddst; + + dsrc0.id.lsb = src[0].u[0]; + dsrc0.id.msb = src[0].u[1]; + + dsrc1.id.lsb = src[1].u[0]; + dsrc1.id.msb = src[1].u[1]; + + ddst.d = dsrc0.d * dsrc1.d; + + dst->u[0] = ddst.id.lsb; + dst->u[1] = ddst.id.msb; +} + +static void +micro_ddiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + union tgsi_double dsrc0, dsrc1, ddst; + + dsrc0.id.lsb = src[0].u[0]; + dsrc0.id.msb = src[0].u[1]; + + dsrc1.id.lsb = src[1].u[0]; + dsrc1.id.msb = src[1].u[1]; + + if (dsrc1.d != 0) { + ddst.d = dsrc0.d/dsrc1.d; + dst->u[0] = ddst.id.lsb; + dst->u[1] = ddst.id.msb; + } +} + +static void +micro_dseq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + union tgsi_double dsrc0, dsrc1, ddst; + + dsrc0.id.lsb = src[0].u[0]; + dsrc0.id.msb = src[0].u[1]; + + dsrc1.id.lsb = src[1].u[0]; + dsrc1.id.msb = src[1].u[1]; + + ddst.d = dsrc0.d == dsrc1.d ? 1.0F : 0.0F; + + dst->u[0] = ddst.id.lsb; + dst->u[1] = ddst.id.msb; +} + +static void +micro_dslt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + union tgsi_double dsrc0, dsrc1, ddst; + + dsrc0.id.lsb = src[0].u[0]; + dsrc0.id.msb = src[0].u[1]; + + dsrc1.id.lsb = src[1].u[0]; + dsrc1.id.msb = src[1].u[1]; + + ddst.d = dsrc0.d < dsrc1.d ? 1.0F : 0.0F; + + dst->u[0] = ddst.id.lsb; + dst->u[1] = ddst.id.msb; +} + +static void +micro_dmax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + union tgsi_double dsrc0, dsrc1, ddst; + + dsrc0.id.lsb = src[0].i[0]; + dsrc0.id.msb = src[0].i[1]; + + dsrc1.id.lsb = src[1].i[0]; + dsrc1.id.msb = src[1].i[1]; + + ddst.d = dsrc0.d > dsrc1.d ? dsrc0.d : dsrc1.d; + + dst->i[0] = ddst.id.lsb; + dst->i[1] = ddst.id.msb; +} + +static void +micro_dmin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + union tgsi_double dsrc0, dsrc1, ddst; + + dsrc0.id.lsb = src[0].u[0]; + dsrc0.id.msb = src[0].u[1]; + + dsrc1.id.lsb = src[1].u[0]; + dsrc1.id.msb = src[1].u[1]; + + ddst.d = dsrc0.d < dsrc1.d ? dsrc0.d : dsrc1.d; + + dst->u[0] = ddst.id.lsb; + dst->u[1] = ddst.id.msb; +} + +static void +micro_dmul(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + union tgsi_double dsrc0, dsrc1, ddst; + + dsrc0.id.lsb = src[0].u[0]; + dsrc0.id.msb = src[0].u[1]; + + dsrc1.id.lsb = src[1].u[0]; + dsrc1.id.msb = src[1].u[1]; + + ddst.d = dsrc0.d * dsrc1.d; + + dst->u[0] = ddst.id.lsb; + dst->u[1] = ddst.id.msb; +} + +static void +micro_dmad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + union tgsi_double dsrc0, dsrc1, dsrc2, ddst; + + dsrc0.id.lsb = src[0].u[0]; + dsrc0.id.msb = src[0].u[1]; + + dsrc1.id.lsb = src[1].u[0]; + dsrc1.id.msb = src[1].u[1]; + + dsrc2.id.lsb = src[2].u[0]; + dsrc2.id.msb = src[2].u[1]; + + ddst.d = dsrc0.d * dsrc1.d + dsrc2.d; + + dst->u[0] = ddst.id.lsb; + dst->u[1] = ddst.id.msb; +} + +static void +micro_drcp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + union tgsi_double ddst, dsrc; + + dsrc.id.lsb = src->u[0]; + dsrc.id.msb = src->u[1]; + + ddst.d = 1.0F / dsrc.d; + + dst->u[0] = ddst.id.lsb; + dst->u[1] = ddst.id.msb; +} + #define CHAN_X 0 #define CHAN_Y 1 @@ -3491,6 +3678,46 @@ exec_instruction( exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; + case TGSI_OPCODE_MOVD: + exec_vector_unary(mach, inst, micro_movd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_DDIV: + exec_vector_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_DADD: + exec_vector_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_DSEQ: + exec_vector_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_DMAX: + exec_vector_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_DMIN: + exec_vector_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_DMUL: + exec_vector_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_DMAD: + exec_vector_trinary(mach, inst, micro_dmad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_DRCP: + exec_vector_unary(mach, inst, micro_drcp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_DSLT: + exec_vector_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_SWITCH: exec_switch(mach, inst); break; @@ -3503,7 +3730,8 @@ exec_instruction( exec_default(mach); break; - case TGSI_OPCODE_ENDSWITCH: + + case TGSI_OPCODE_ENDSWITCH: exec_endswitch(mach); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index de0e09c..c9bd51a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -171,6 +171,16 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 0, 0, 0, 0, "USGE", TGSI_OPCODE_USGE }, { 1, 2, 0, 0, 0, 0, "USHR", TGSI_OPCODE_USHR }, { 1, 2, 0, 0, 0, 0, "USLT", TGSI_OPCODE_USLT }, + { 1, 1, 0, 0, 0, 0, "MOVD", TGSI_OPCODE_MOVD }, + { 1, 2, 0, 0, 0, 0, "DDIV", TGSI_OPCODE_DDIV }, + { 1, 2, 0, 0, 0, 0, "DADD", TGSI_OPCODE_DADD }, + { 1, 2, 0, 0, 0, 0, "DSEQ", TGSI_OPCODE_DSEQ }, + { 1, 2, 0, 0, 0, 0, "DMAX", TGSI_OPCODE_DMAX }, + { 1, 2, 0, 0, 0, 0, "DMIN", TGSI_OPCODE_DMIN }, + { 1, 2, 0, 0, 0, 0, "DMUL", TGSI_OPCODE_DMUL }, + { 1, 3, 0, 0, 0, 0, "DMAD", TGSI_OPCODE_DMAD }, + { 1, 1, 0, 0, 0, 0, "DRCP", TGSI_OPCODE_DRCP }, + { 1, 2, 0, 0, 0, 0, "DSLT", TGSI_OPCODE_DSLT }, { 1, 2, 0, 0, 0, 0, "USNE", TGSI_OPCODE_USNE }, { 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH }, { 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index e4af15c..18b6573 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -167,7 +167,16 @@ OP12(USGE) OP12(USHR) OP12(USLT) OP12(USNE) - +OP11(MOVD) +OP12(DDIV) +OP12(DADD) +OP12(DSEQ) +OP12(DMAX) +OP12(DMIN) +OP12(DMUL) +OP13(DMAD) +OP11(DRCP) +OP12(DSLT) #undef OP00 #undef OP01 -- 1.6.3.3
From 10028fc475a6dbbcffd0f7c1e84394e822cee124 Mon Sep 17 00:00:00 2001 From: Igor Oliveira <igor.olive...@openbossa.org> Date: Tue, 12 Jan 2010 07:41:26 -0400 Subject: [PATCH 3/3] tgsi: add double opcodes documentation --- .../auxiliary/tgsi/tgsi-instruction-set.txt | 88 ++++++++++++++------ 1 files changed, 62 insertions(+), 26 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index 080fd4c..57dcbad 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -26,8 +26,11 @@ TGSI Instruction Specification dst.z = src.z dst.w = src.w +1.1.3 DMOV - Move Double -1.1.3 LIT - Light Coefficients + dst.xy = src.xy + +1.1.4 LIT - Light Coefficients dst.x = 1.0 dst.y = max(src.x, 0.0) @@ -35,15 +38,18 @@ TGSI Instruction Specification dst.w = 1.0 -1.1.4 RCP - Reciprocal +1.1.5 RCP - Reciprocal dst.x = 1.0 / src.x dst.y = 1.0 / src.x dst.z = 1.0 / src.x dst.w = 1.0 / src.x +1.1.6 DRCP - Reciprocal Double + + dst.xy = 1.0 / src.xy -1.1.5 RSQ - Reciprocal Square Root +1.1.7 RSQ - Reciprocal Square Root dst.x = 1.0 / sqrt(abs(src.x)) dst.y = 1.0 / sqrt(abs(src.x)) @@ -51,7 +57,7 @@ TGSI Instruction Specification dst.w = 1.0 / sqrt(abs(src.x)) -1.1.6 EXP - Approximate Exponential Base 2 +1.1.8 EXP - Approximate Exponential Base 2 dst.x = pow(2.0, floor(src.x)) dst.y = src.x - floor(src.x) @@ -59,7 +65,7 @@ TGSI Instruction Specification dst.w = 1.0 -1.1.7 LOG - Approximate Logarithm Base 2 +1.1.9 LOG - Approximate Logarithm Base 2 dst.x = floor(lg2(abs(src.x))) dst.y = abs(src.x) / pow(2.0, floor(lg2(abs(src.x)))) @@ -67,23 +73,29 @@ TGSI Instruction Specification dst.w = 1.0 -1.1.8 MUL - Multiply +1.1.10 MUL - Multiply dst.x = src0.x * src1.x dst.y = src0.y * src1.y dst.z = src0.z * src1.z dst.w = src0.w * src1.w +1.1.11 DMUL - Multiple Double -1.1.9 ADD - Add + dst.xy = src0.xy * src1.xy + +1.1.12 ADD - Add dst.x = src0.x + src1.x dst.y = src0.y + src1.y dst.z = src0.z + src1.z dst.w = src0.w + src1.w +1.1.13 DADD - Add Double + + dst.x = src0.xy + src1.xy -1.1.10 DP3 - 3-component Dot Product +1.1.14 DP3 - 3-component Dot Product dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z @@ -91,7 +103,7 @@ TGSI Instruction Specification dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z -1.1.11 DP4 - 4-component Dot Product +1.1.15 DP4 - 4-component Dot Product dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w @@ -99,7 +111,7 @@ TGSI Instruction Specification dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w -1.1.12 DST - Distance Vector +1.1.16 DST - Distance Vector dst.x = 1.0 dst.y = src0.y * src1.y @@ -107,31 +119,40 @@ TGSI Instruction Specification dst.w = src1.w -1.1.13 MIN - Minimum +1.1.17 MIN - Minimum dst.x = min(src0.x, src1.x) dst.y = min(src0.y, src1.y) dst.z = min(src0.z, src1.z) dst.w = min(src0.w, src1.w) +1.1.18 DMIN - Minimum Double -1.1.14 MAX - Maximum + dst.xy = min(src0.xy, src1.xy) + +1.1.19 MAX - Maximum dst.x = max(src0.x, src1.x) dst.y = max(src0.y, src1.y) dst.z = max(src0.z, src1.z) dst.w = max(src0.w, src1.w) +1.1.20 DMAX = Maximum Double + + dst.xy = (src0.xy, src1.xy) -1.1.15 SLT - Set On Less Than +1.1.21 SLT - Set On Less Than dst.x = (src0.x < src1.x) ? 1.0 : 0.0 dst.y = (src0.y < src1.y) ? 1.0 : 0.0 dst.z = (src0.z < src1.z) ? 1.0 : 0.0 dst.w = (src0.w < src1.w) ? 1.0 : 0.0 +1.1.22 - DSLT - Double Set On Less Than + + dst.xy = (src0.xy < src1.xy) ? 1.0 : 0.0 -1.1.16 SGE - Set On Greater Equal Than +1.1.23 SGE - Set On Greater Equal Than dst.x = (src0.x >= src1.x) ? 1.0 : 0.0 dst.y = (src0.y >= src1.y) ? 1.0 : 0.0 @@ -139,13 +160,16 @@ TGSI Instruction Specification dst.w = (src0.w >= src1.w) ? 1.0 : 0.0 -1.1.17 MAD - Multiply And Add +1.1.24 MAD - Multiply And Add dst.x = src0.x * src1.x + src2.x dst.y = src0.y * src1.y + src2.y dst.z = src0.z * src1.z + src2.z dst.w = src0.w * src1.w + src2.w +1.1.25 DMAD - Multiply and Add Doubles + + dst.xy = src0.xy * src1.xy + src2.xy 1.2 GL_ATI_fragment_shader --------------------------- @@ -431,6 +455,9 @@ TGSI Instruction Specification dst.z = (src0.z == src1.z) ? 1.0 : 0.0 dst.w = (src0.w == src1.w) ? 1.0 : 0.0 +1.5.16 DSEQ - Set on Equal Double + + dst.xy = (src0.xy == src1.xy) ? 1.0F : 0.0F 1.5.17 SFL - Set On False @@ -632,8 +659,11 @@ TGSI Instruction Specification dst.z = src0.z / src1.z dst.w = src0.w / src1.w +1.9.3 DDIV - Divide Double + + dst.xy = src0.xy/src1.xy -1.9.3 DP2 - 2-component Dot Product +1.9.4 DP2 - 2-component Dot Product dst.x = src0.x * src1.x + src0.y * src1.y dst.y = src0.x * src1.x + src0.y * src1.y @@ -641,27 +671,27 @@ TGSI Instruction Specification dst.w = src0.x * src1.x + src0.y * src1.y -1.9.4 DP2A - 2-component Dot Product And Add +1.9.5 DP2A - 2-component Dot Product And Add Alias for DOT2ADD. -1.9.5 TXL - Texture Lookup With LOD +1.9.6 TXL - Texture Lookup With LOD TBD -1.9.6 BRK - Break +1.9.7 BRK - Break TBD -1.9.7 IF - If +1.9.8 IF - If TBD -1.9.8 BGNFOR - Begin a For-Loop +1.9.9 BGNFOR - Begin a For-Loop dst.x = floor(src.x) dst.y = floor(src.y) @@ -675,22 +705,22 @@ TGSI Instruction Specification The source must be a constant register. -1.9.9 REP - Repeat +1.9.10 REP - Repeat TBD -1.9.10 ELSE - Else +1.9.11 ELSE - Else TBD -1.9.11 ENDIF - End If +1.9.12 ENDIF - End If TBD -1.9.12 ENDFOR - End a For-Loop +1.9.13 ENDFOR - End a For-Loop dst.x = dst.x + dst.z dst.y = dst.y - 1.0 @@ -702,7 +732,7 @@ TGSI Instruction Specification Note: The destination must be a loop register. -1.9.13 ENDREP - End Repeat +1.9.14 ENDREP - End Repeat TBD @@ -1129,6 +1159,12 @@ TGSI Instruction Specification target Label of target instruction. + src.xy First source register with double value + + src0.xy Second Source register with double value + + src1.xy Third Source register with double value + 3 Other tokens =============== -- 1.6.3.3
------------------------------------------------------------------------------ This SF.Net email is sponsored by the Verizon Developer Community Take advantage of Verizon's best-in-class app development support A streamlined, 14 day to market process makes app distribution fast and easy Join now and get one step closer to millions of Verizon customers http://p.sf.net/sfu/verizon-dev2dev
_______________________________________________ Mesa3d-dev mailing list Mesa3d-dev@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/mesa3d-dev