Author: awatry Date: Mon Aug 19 13:31:49 2013 New Revision: 188684 URL: http://llvm.org/viewvc/llvm-project?rev=188684&view=rev Log: Add mul_hi implementation [v2]
Everything except long/ulong is handled by just casting to the next larger type, doing the math and then shifting/casting the result. For 64-bit types, we break the high/low parts of each operand apart, and do a FOIL-based multiplication. v2: Discard the stack-overflow implementation due to copyright concerns. - The implementation is still FOIL-based, but discards the previous code. Reviewed-by: Tom Stellard <[email protected]> Added: libclc/trunk/generic/include/clc/integer/mul_hi.h libclc/trunk/generic/include/clc/integer/mul_hi.inc libclc/trunk/generic/lib/integer/mul_hi.cl Modified: libclc/trunk/generic/include/clc/clc.h libclc/trunk/generic/lib/SOURCES Modified: libclc/trunk/generic/include/clc/clc.h URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/clc.h?rev=188684&r1=188683&r2=188684&view=diff ============================================================================== --- libclc/trunk/generic/include/clc/clc.h (original) +++ libclc/trunk/generic/include/clc/clc.h Mon Aug 19 13:31:49 2013 @@ -68,6 +68,7 @@ #include <clc/integer/hadd.h> #include <clc/integer/mad24.h> #include <clc/integer/mul24.h> +#include <clc/integer/mul_hi.h> #include <clc/integer/rhadd.h> #include <clc/integer/rotate.h> #include <clc/integer/sub_sat.h> Added: libclc/trunk/generic/include/clc/integer/mul_hi.h URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/integer/mul_hi.h?rev=188684&view=auto ============================================================================== --- libclc/trunk/generic/include/clc/integer/mul_hi.h (added) +++ libclc/trunk/generic/include/clc/integer/mul_hi.h Mon Aug 19 13:31:49 2013 @@ -0,0 +1,2 @@ +#define __CLC_BODY <clc/integer/mul_hi.inc> +#include <clc/integer/gentype.inc> Added: libclc/trunk/generic/include/clc/integer/mul_hi.inc URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/integer/mul_hi.inc?rev=188684&view=auto ============================================================================== --- libclc/trunk/generic/include/clc/integer/mul_hi.inc (added) +++ libclc/trunk/generic/include/clc/integer/mul_hi.inc Mon Aug 19 13:31:49 2013 @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul_hi(__CLC_GENTYPE x, __CLC_GENTYPE y); Modified: libclc/trunk/generic/lib/SOURCES URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/SOURCES?rev=188684&r1=188683&r2=188684&view=diff ============================================================================== --- libclc/trunk/generic/lib/SOURCES (original) +++ libclc/trunk/generic/lib/SOURCES Mon Aug 19 13:31:49 2013 @@ -14,6 +14,7 @@ integer/clz_impl.ll integer/hadd.cl integer/mad24.cl integer/mul24.cl +integer/mul_hi.cl integer/rhadd.cl integer/rotate.cl integer/sub_sat.cl Added: libclc/trunk/generic/lib/integer/mul_hi.cl URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/integer/mul_hi.cl?rev=188684&view=auto ============================================================================== --- libclc/trunk/generic/lib/integer/mul_hi.cl (added) +++ libclc/trunk/generic/lib/integer/mul_hi.cl Mon Aug 19 13:31:49 2013 @@ -0,0 +1,109 @@ +#include <clc/clc.h> + +//For all types EXCEPT long, which is implemented separately +#define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE) \ + _CLC_OVERLOAD _CLC_DECL GENTYPE mul_hi(GENTYPE x, GENTYPE y){ \ + return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE); \ + } \ + +//FOIL-based long mul_hi +// +// Summary: Treat mul_hi(long x, long y) as: +// (a+b) * (c+d) where a and c are the high-order parts of x and y respectively +// and b and d are the low-order parts of x and y. +// Thinking back to algebra, we use FOIL to do the work. + +_CLC_OVERLOAD _CLC_DECL long mul_hi(long x, long y){ + long f, o, i; + ulong l; + + //Move the high/low halves of x/y into the lower 32-bits of variables so + //that we can multiply them without worrying about overflow. + long x_hi = x >> 32; + long x_lo = x & UINT_MAX; + long y_hi = y >> 32; + long y_lo = y & UINT_MAX; + + //Multiply all of the components according to FOIL method + f = x_hi * y_hi; + o = x_hi * y_lo; + i = x_lo * y_hi; + l = x_lo * y_lo; + + //Now add the components back together in the following steps: + //F: doesn't need to be modified + //O/I: Need to be added together. + //L: Shift right by 32-bits, then add into the sum of O and I + //Once O/I/L are summed up, then shift the sum by 32-bits and add to F. + // + //We use hadd to give us a bit of extra precision for the intermediate sums + //but as a result, we shift by 31 bits instead of 32 + return (long)(f + (hadd(o, (i + (long)((ulong)l>>32))) >> 31)); +} + +_CLC_OVERLOAD _CLC_DECL ulong mul_hi(ulong x, ulong y){ + ulong f, o, i; + ulong l; + + //Move the high/low halves of x/y into the lower 32-bits of variables so + //that we can multiply them without worrying about overflow. + ulong x_hi = x >> 32; + ulong x_lo = x & UINT_MAX; + ulong y_hi = y >> 32; + ulong y_lo = y & UINT_MAX; + + //Multiply all of the components according to FOIL method + f = x_hi * y_hi; + o = x_hi * y_lo; + i = x_lo * y_hi; + l = x_lo * y_lo; + + //Now add the components back together, taking care to respect the fact that: + //F: doesn't need to be modified + //O/I: Need to be added together. + //L: Shift right by 32-bits, then add into the sum of O and I + //Once O/I/L are summed up, then shift the sum by 32-bits and add to F. + // + //We use hadd to give us a bit of extra precision for the intermediate sums + //but as a result, we shift by 31 bits instead of 32 + return (f + (hadd(o, (i + (l>>32))) >> 31)); +} + +#define __CLC_MUL_HI_VEC(GENTYPE) \ + _CLC_OVERLOAD _CLC_DECL GENTYPE##2 mul_hi(GENTYPE##2 x, GENTYPE##2 y){ \ + return (GENTYPE##2){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1)}; \ + } \ + _CLC_OVERLOAD _CLC_DECL GENTYPE##3 mul_hi(GENTYPE##3 x, GENTYPE##3 y){ \ + return (GENTYPE##3){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1), mul_hi(x.s2, y.s2)}; \ + } \ + _CLC_OVERLOAD _CLC_DECL GENTYPE##4 mul_hi(GENTYPE##4 x, GENTYPE##4 y){ \ + return (GENTYPE##4){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \ + } \ + _CLC_OVERLOAD _CLC_DECL GENTYPE##8 mul_hi(GENTYPE##8 x, GENTYPE##8 y){ \ + return (GENTYPE##8){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \ + } \ + _CLC_OVERLOAD _CLC_DECL GENTYPE##16 mul_hi(GENTYPE##16 x, GENTYPE##16 y){ \ + return (GENTYPE##16){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \ + } \ + +#define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS) \ + __CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \ + __CLC_MUL_HI_VEC(TYPE) + +#define __CLC_MUL_HI_TYPES() \ + __CLC_MUL_HI_DEC_IMPL(short, char, 8) \ + __CLC_MUL_HI_DEC_IMPL(ushort, uchar, 8) \ + __CLC_MUL_HI_DEC_IMPL(int, short, 16) \ + __CLC_MUL_HI_DEC_IMPL(uint, ushort, 16) \ + __CLC_MUL_HI_DEC_IMPL(long, int, 32) \ + __CLC_MUL_HI_DEC_IMPL(ulong, uint, 32) \ + __CLC_MUL_HI_VEC(long) \ + __CLC_MUL_HI_VEC(ulong) + +__CLC_MUL_HI_TYPES() + +#undef __CLC_MUL_HI_TYPES +#undef __CLC_MUL_HI_DEC_IMPL +#undef __CLC_MUL_HI_IMPL +#undef __CLC_MUL_HI_VEC +#undef __CLC_B32 _______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
