[Mesa3d-dev] [PATCH] add double opcodes to tgsi

Igor Oliveira Mon, 11 Jan 2010 06:01:55 -0800

These patches add support to double opcodes as discussed in mail list.
The opcodes create are: movd, ddiv, dadd, dseq, dmax, dmin, dmul,
dmuladd, drcp and dslt.
They are used like suggested by Zack:


MOVD A.xy, C.xy, c.xy

where x is the lsb and y is the msb.

There are still missing some opcodes being implemented(i will send the
code soon), they are:
dfrac, dfracexp, dldexp and convert between float and double.

Igor

From 4eebdbbd2822157f063a84b3dcb425ddbab84104 Mon Sep 17 00:00:00 2001
From: Igor Oliveira <[email protected]>
Date: Mon, 11 Jan 2010 09:31:27 -0400
Subject: [PATCH 1/2] tgsi: add double opcodes to gallium

---
 src/gallium/include/pipe/p_shader_tokens.h |   13 ++++++++++++-
 1 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 550e2ab..27125fc 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -319,7 +319,18 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_CASE                142
 #define TGSI_OPCODE_DEFAULT             143
 #define TGSI_OPCODE_ENDSWITCH           144
-#define TGSI_OPCODE_LAST                145
+
+#define TGSI_OPCODE_MOVD                145
+#define TGSI_OPCODE_DDIV                146
+#define TGSI_OPCODE_DADD                147
+#define TGSI_OPCODE_DSEQ                148
+#define TGSI_OPCODE_DMAX                149
+#define TGSI_OPCODE_DMIN                150
+#define TGSI_OPCODE_DMUL                151
+#define TGSI_OPCODE_DMULADD             152
+#define TGSI_OPCODE_DRCP                153
+#define TGSI_OPCODE_DSLT                154
+#define TGSI_OPCODE_LAST                155
 
 #define TGSI_SAT_NONE            0  /* do not saturate */
 #define TGSI_SAT_ZERO_ONE        1  /* clamp to [0,1] */
-- 
1.6.3.3

From 63048b005ffcba83064069619d1bd19145d5d515 Mon Sep 17 00:00:00 2001
From: Igor Oliveira <[email protected]>
Date: Mon, 11 Jan 2010 09:31:57 -0400
Subject: [PATCH 2/2] tgsi: implement double opcodes

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c       |  274 +++++++++++++++++++++++++-
 src/gallium/auxiliary/tgsi/tgsi_info.c       |   10 +
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h |   11 +-
 3 files changed, 293 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index f43233b..3c37931 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -69,6 +69,15 @@
 #define TILE_BOTTOM_LEFT  2
 #define TILE_BOTTOM_RIGHT 3
 
+union tgsi_double {
+   struct int_double {
+      int lsb;
+      int msb;
+      double d;
+   } id;
+   double d;
+};
+
 static void
 micro_abs(union tgsi_exec_channel *dst,
           const union tgsi_exec_channel *src)
@@ -380,6 +389,228 @@ micro_trunc(union tgsi_exec_channel *dst,
    dst->f[3] = (float)(int)src->f[3];
 }
 
+static double create_double(unsigned int lsb,
+                            unsigned int msb)
+{
+   long long int value;
+   long long int f;
+   int e,s;
+   double dst;
+
+   value = ((long long int)msb << 32) +
+           (long long int)lsb;
+
+   s = (int) ((value & 0x8000000000000000) >> 63);
+   e = (int) ((value & 0x7FE0000000000000) >> 52);
+   f = (value & 0x001FFFFFFFFFFFFF);
+
+
+   e = e?(e - 1023 - 51):(1022 - 52);
+   dst = ldxep((double)f, e);
+
+   return (s?-dst:dst);
+}
+
+static void
+micro_movd(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   union tgsi_double dsrc, ddst;
+
+   dsrc.id.lsb = src->u[0];
+   dsrc.id.msb = src->u[1];
+
+   ddst.d = dsrc.d;
+
+   dst->u[0] = ddst.id.lsb;
+   dst->u[1] = ddst.id.msb;
+}
+
+static void
+micro_dadd(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   union tgsi_double dsrc0, dsrc1, ddst;
+
+   dsrc0.id.lsb = src[0].u[0];
+   dsrc0.id.msb = src[0].u[1];
+
+   dsrc1.id.lsb = src[1].u[0];
+   dsrc1.id.msb = src[1].u[1];
+
+   dsrc0.d = create_double(dsrc0.id.lsb, dsrc0.id.msb);
+   dsrc1.d = create_double(dsrc1.id.lsb, dsrc1.id.msb);
+
+   ddst.d = dsrc0.d * dsrc1.d;
+
+   dst->u[0] = ddst.id.lsb;
+   dst->u[1] = ddst.id.msb;
+}
+
+static void
+micro_ddiv(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   union tgsi_double dsrc0, dsrc1, ddst;
+
+   dsrc0.id.lsb = src[0].u[0];
+   dsrc0.id.msb = src[0].u[1];
+
+   dsrc1.id.lsb = src[1].u[0];
+   dsrc1.id.msb = src[1].u[1];
+
+   if (dsrc1.d != 0) {
+      ddst.d = dsrc0.d/dsrc1.d;
+      dst->u[0] = ddst.id.lsb;
+      dst->u[1] = ddst.id.msb;
+   }
+}
+
+static void
+micro_dseq(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   union tgsi_double dsrc0, dsrc1, ddst;
+
+   dsrc0.id.lsb = src[0].u[0];
+   dsrc0.id.msb = src[0].u[1];
+
+   dsrc1.id.lsb = src[1].u[0];
+   dsrc1.id.msb = src[1].u[1];
+
+   dsrc0.d = create_double(dsrc0.id.lsb, dsrc0.id.msb);
+   dsrc1.d = create_double(dsrc1.id.lsb, dsrc1.id.msb);
+   ddst.d = dsrc0.d == dsrc1.d ? 1.0F : 0.0F;
+
+   dst->u[0] = ddst.id.lsb;
+   dst->u[1] = ddst.id.msb;
+}
+
+static void
+micro_dslt(union tgsi_exec_channel *dst,
+          const union tgsi_exec_channel *src)
+{
+   union tgsi_double dsrc0, dsrc1, ddst;
+
+   dsrc0.id.lsb = src[0].u[0];
+   dsrc0.id.msb = src[0].u[1];
+
+   dsrc1.id.lsb = src[1].u[0];
+   dsrc1.id.msb = src[1].u[1];
+
+   dsrc0.d = create_double(dsrc0.id.lsb, dsrc0.id.msb);
+   dsrc1.d = create_double(dsrc1.id.lsb, dsrc1.id.msb);
+   ddst.d = dsrc0.d < dsrc1.d ? 1.0F : 0.0F;
+
+   dst->u[0] = ddst.id.lsb;
+   dst->u[1] = ddst.id.msb;
+}
+
+static void
+micro_dmax(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   union tgsi_double dsrc0, dsrc1, ddst;
+
+   dsrc0.id.lsb = src[0].i[0];
+   dsrc0.id.msb = src[0].i[1];
+
+   dsrc1.id.lsb = src[1].i[0];
+   dsrc1.id.msb = src[1].i[1];
+
+   dsrc0.d = create_double(dsrc0.id.lsb, dsrc0.id.msb);
+   dsrc1.d = create_double(dsrc1.id.lsb, dsrc1.id.msb);
+
+   ddst.d = dsrc0.d > dsrc1.d ? dsrc0.d : dsrc1.d;
+
+   dst->i[0] = ddst.id.lsb;
+   dst->i[1] = ddst.id.msb;
+}
+
+static void
+micro_dmin(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   union tgsi_double dsrc0, dsrc1, ddst;
+
+   dsrc0.id.lsb = src[0].u[0];
+   dsrc0.id.msb = src[0].u[1];
+
+   dsrc1.id.lsb = src[1].u[0];
+   dsrc1.id.msb = src[1].u[1];
+
+   dsrc0.d = create_double(dsrc0.id.lsb, dsrc0.id.msb);
+   dsrc1.d = create_double(dsrc1.id.lsb, dsrc1.id.msb);
+
+   ddst.d = dsrc0.d < dsrc1.d ? dsrc0.d : dsrc1.d;
+
+   dst->u[0] = ddst.id.lsb;
+   dst->u[1] = ddst.id.msb;
+}
+
+static void
+micro_dmul(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   union tgsi_double dsrc0, dsrc1, ddst;
+
+   dsrc0.id.lsb = src[0].u[0];
+   dsrc0.id.msb = src[0].u[1];
+
+   dsrc1.id.lsb = src[1].u[0];
+   dsrc1.id.msb = src[1].u[1];
+
+   dsrc0.d = create_double(dsrc0.id.lsb, dsrc0.id.msb);
+   dsrc1.d = create_double(dsrc1.id.lsb, dsrc1.id.msb);
+
+   ddst.d = dsrc0.d * dsrc1.d;
+
+   dst->u[0] = ddst.id.lsb;
+   dst->u[1] = ddst.id.msb;
+}
+
+static void
+micro_dmad(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   union tgsi_double dsrc0, dsrc1, dsrc2, ddst;
+
+   dsrc0.id.lsb = src[0].u[0];
+   dsrc0.id.msb = src[0].u[1];
+
+   dsrc1.id.lsb = src[1].u[0];
+   dsrc1.id.msb = src[1].u[1];
+
+   dsrc2.id.lsb = src[2].u[0];
+   dsrc2.id.msb = src[2].u[1];
+
+   dsrc0.d = create_double(dsrc0.id.lsb, dsrc0.id.msb);
+   dsrc1.d = create_double(dsrc1.id.lsb, dsrc1.id.msb);
+   dsrc2.d = create_double(dsrc2.id.lsb, dsrc2.id.msb);
+
+   ddst.d = dsrc0.d * dsrc1.d + dsrc2.d;
+
+   dst->u[0] = ddst.id.lsb;
+   dst->u[1] = ddst.id.msb;
+}
+
+static void
+micro_drcp(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src)
+{
+   union tgsi_double ddst, dsrc;
+
+   dsrc.id.lsb = src->u[0];
+   dsrc.id.msb = src->u[1];
+
+   dsrc.d = create_double(dsrc.id.lsb, dsrc.id.msb);
+
+   ddst.d = 1.0F / dsrc.d;
+
+   dst->u[0] = ddst.id.lsb;
+   dst->u[1] = ddst.id.msb;
+}
+
 
 #define CHAN_X  0
 #define CHAN_Y  1
@@ -3491,6 +3722,46 @@ exec_instruction(
       exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
       break;
 
+   case TGSI_OPCODE_MOVD:
+      exec_vector_unary(mach, inst, micro_movd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_DDIV:
+      exec_vector_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_DADD:
+      exec_vector_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_DSEQ:
+      exec_vector_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_DMAX:
+      exec_vector_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_DMIN:
+      exec_vector_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_DMUL:
+      exec_vector_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_DMULADD:
+      exec_vector_trinary(mach, inst, micro_dmad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_DRCP:
+      exec_vector_unary(mach, inst, micro_drcp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+      break;
+
+   case TGSI_OPCODE_DSLT:
+      exec_vector_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+      break;
+
    case TGSI_OPCODE_SWITCH:
       exec_switch(mach, inst);
       break;
@@ -3503,7 +3774,8 @@ exec_instruction(
       exec_default(mach);
       break;
 
-   case TGSI_OPCODE_ENDSWITCH:
+
+       case TGSI_OPCODE_ENDSWITCH:
       exec_endswitch(mach);
       break;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index de0e09c..f3641e3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -171,6 +171,16 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 1, 2, 0, 0, 0, 0, "USGE", TGSI_OPCODE_USGE },
    { 1, 2, 0, 0, 0, 0, "USHR", TGSI_OPCODE_USHR },
    { 1, 2, 0, 0, 0, 0, "USLT", TGSI_OPCODE_USLT },
+   { 1, 1, 0, 0, 0, 0, "MOVD", TGSI_OPCODE_MOVD },
+   { 1, 2, 0, 0, 0, 0, "DDIV", TGSI_OPCODE_DDIV },
+   { 1, 2, 0, 0, 0, 0, "DADD", TGSI_OPCODE_DADD },
+   { 1, 2, 0, 0, 0, 0, "DSEQ", TGSI_OPCODE_DSEQ },
+   { 1, 2, 0, 0, 0, 0, "DMAX", TGSI_OPCODE_DMAX },
+   { 1, 2, 0, 0, 0, 0, "DMIN", TGSI_OPCODE_DMIN },
+   { 1, 2, 0, 0, 0, 0, "DMUL", TGSI_OPCODE_DMUL },
+   { 1, 3, 0, 0, 0, 0, "DMAD", TGSI_OPCODE_DMULADD },
+   { 1, 1, 0, 0, 0, 0, "DRCP", TGSI_OPCODE_DRCP },
+   { 1, 2, 0, 0, 0, 0, "DSLT", TGSI_OPCODE_DSLT },
    { 1, 2, 0, 0, 0, 0, "USNE", TGSI_OPCODE_USNE },
    { 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH },
    { 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index e4af15c..595653b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -167,7 +167,16 @@ OP12(USGE)
 OP12(USHR)
 OP12(USLT)
 OP12(USNE)
-
+OP11(MOVD)
+OP12(DDIV)
+OP12(DADD)
+OP12(DSEQ)
+OP12(DMAX)
+OP12(DMIN)
+OP12(DMUL)
+OP13(DMULADD)
+OP11(DRCP)
+OP12(DSLT)
 
 #undef OP00
 #undef OP01
-- 
1.6.3.3

------------------------------------------------------------------------------
This SF.Net email is sponsored by the Verizon Developer Community
Take advantage of Verizon's best-in-class app development support
A streamlined, 14 day to market process makes app distribution fast and easy
Join now and get one step closer to millions of Verizon customers
http://p.sf.net/sfu/verizon-dev2dev

_______________________________________________
Mesa3d-dev mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev

[Mesa3d-dev] [PATCH] add double opcodes to tgsi

Reply via email to