Hi Igor,

The overall intent is good, but this creates 4*64bit = 256 bit registers which 
don't exist. LLVM can split into 128bit instructions, but I found that to be 
buggy in some cases, and it affects our ability to use sse intrinsics.

It is also unnecessary for vertical operations such as multiplication.

So I'd prefer that you create a <2*double> vector, and issue two 
multiplications per channel, and do the same for other double opcodes. 

Is there any double opcode for which this would not work?

A few minor details: instead of lp_types_to_double/lp_double_to_types, I'd 
prefer cast_to_double, cast_from_double; and the double type should be computed 
once and stored in the tgsi build context.  

________________________________________
From: Igor Oliveira [igor.olive...@openbossa.org]
Sent: Thursday, September 16, 2010 3:24
To: mesa3d-dev
Subject: [Mesa3d-dev] Gallium double opcodes

Hi,

I am reliving gallium double opcode branch and make some work in llvm
driver. So before the tests was done using the python/st but look
likes
it is a little bit out dated. So i am just sending the code below for
review and suggestions(i am new in llvm api), basically all the others
opcodes would be done
using the same logic.


diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index ca8db9c..29892d9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -970,6 +970,60 @@ emit_kil(
       lp_build_mask_update(bld->mask, mask);
 }

+static LLVMValueRef
+lp_types_to_double(struct lp_build_context *bld,
+                   LLVMValueRef a,
+                   LLVMValueRef b)
+{
+   LLVMValueRef res;
+   struct lp_type type;
+   LLVMTypeRef vec_type;
+   LLVMTypeRef vec_double_type;
+
+   assert(lp_check_value(bld->type, a));
+   assert(lp_check_value(bld->type, b));
+
+   type = lp_type_uint(64);
+   type.length = bld->type.length;
+
+   vec_type = lp_build_vec_type(type);
+   a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+   b = LLVMBuildBitCast(bld->builder, b, vec_type, "");
+
+   res = LLVMBuildShl(bld->builder, a, lp_build_const_int_vec(type, 32),"");
+   res = LLVMBuildOr(bld->builder, res, b, "");
+
+   a = LLVMBuildBitCast(bld->builder, a, bld->vec_type, "");
+   b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, "");
+
+   type = lp_type_float(64);
+   type.length = bld->type.length;
+   vec_double_type = lp_build_vec_type(type);
+   res = LLVMBuildBitCast(bld->builder, res, vec_double_type, "");
+
+   return res;
+}
+
+static void
+lp_double_to_types(struct lp_build_context *bld,
+                   LLVMValueRef double_value,
+                   LLVMValueRef a,
+                   LLVMValueRef b)
+{
+   LLVMTypeRef double_type;
+   struct lp_type type = lp_type_uint(64);
+   type.length = bld->type.length;
+
+   double_type = lp_build_vec_type(type);
+
+   a = LLVMBuildBitCast(bld->builder, double_value, double_type, "");
+
+   b = LLVMBuildAnd(bld->builder, a, lp_build_const_int_vec(type,
0x00000000FFFFFFFF), "");
+
+   a = LLVMBuildBitCast(bld->builder, a, bld->vec_type, "");
+   b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, "");
+}
+

 /**
  * Predicated fragment kill.
@@ -1988,6 +2042,34 @@ emit_instruction(
    case TGSI_OPCODE_NOP:
       break;

+   case TGSI_OPCODE_DMUL:
+      if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) &&
IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
+         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
+
+         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
+         tmp3 = emit_fetch( bld, inst, 1, CHAN_Y );
+
+         src0 = lp_types_to_double(&bld->base, tmp0, tmp1);
+         src1 = lp_types_to_double(&bld->base, tmp2, tmp3);
+         tmp4 = lp_build_mul(&bld->base, src0, src1);
+         lp_double_to_types(&bld->base, tmp4, dst0[CHAN_X], dst0[CHAN_Y]);
+      }
+
+      if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) &&
IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
+         tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
+         tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
+
+         tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
+         tmp3 = emit_fetch( bld, inst, 1, CHAN_W );
+
+         src0 = lp_types_to_double(&bld->base, tmp0, tmp1);
+         src1 = lp_types_to_double(&bld->base, tmp2, tmp3);
+         tmp4 = lp_build_mul(&bld->base, src0, src1);
+         lp_double_to_types(&bld->base, tmp4, dst0[CHAN_Z], dst0[CHAN_W]);
+      }
+      break;
+
    default:
       return FALSE;
    }

------------------------------------------------------------------------------
Start uncovering the many advantages of virtual appliances
and start using them to simplify application deployment and
accelerate your shift to cloud computing.
http://p.sf.net/sfu/novell-sfdev2dev
_______________________________________________
Mesa3d-dev mailing list
Mesa3d-dev@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev

------------------------------------------------------------------------------
Start uncovering the many advantages of virtual appliances
and start using them to simplify application deployment and
accelerate your shift to cloud computing.
http://p.sf.net/sfu/novell-sfdev2dev
_______________________________________________
Mesa3d-dev mailing list
Mesa3d-dev@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev

Reply via email to