On Mon, Feb 7, 2011 at 6:20 PM, Stefan Dösinger <stefandoesin...@gmx.at> wrote: > -----BEGIN PGP SIGNED MESSAGE----- > Hash: SHA1 > > > Am 07.02.2011 um 23:49 schrieb Misha Koshelev: > I have no full explanation yet, just bits and pieces. By the way, if you > haven't done so already I recommend a thorough study of > http://en.wikipedia.org/wiki/Floating_point for background knowledge. > http://en.wikipedia.org/wiki/IEEE_754-2008 may also be interesting, but I > haven't read that wikipedia page so I don't really know what is in it. > >> I believe I've found that the threshold value is actually 65520.0 for >> conversions from single to half precision > That makes sense, it is halfway between 65536 and 65604. At this threshold > values are rounded up or down > >> In any case, I have gotten the half to single precision conversion to >> work in Wine by extending to 16 bits for the exponent (changing < 31 >> to < 32). > You may as well replace it with TRUE because the exponent won't be >= 32. Too > few bits to express anything bigger than 31. > >> However, I am still having trouble with the single to half precision >> and believe I am spinning my wheels a bit at this point. > I'm still trying to make sense of the Windows values :-/ > >> math.c:2249: Test failed: Got 7800, expected 7bff or 7bff for index 3. > Looks like the exponent is 1 too high and the mantissa zero instead of 1023. > Probably a rounding / off by one bug in the code from wined3d. Different > behavior wrt what happens when you exactly hit the rounding threshold. (Ie, > do you round an exact 5 up or down(to 0 or 10)?) > >> math.c:2249: Test failed: Got ffff, expected fc00 or fce2 for index 8. >> math.c:2249: Test failed: Got ffff, expected fc00 or fc00 for index 12. >> math.c:2249: Test failed: Got ffff, expected fc00 or fc00 for index 13. >> math.c:2249: Test failed: Got ffff, expected fc00 or fc00 for index 14. >> math.c:2249: Test failed: Got ffff, expected fc00 or fc00 for index 15. > This is this statement: > + if (*in < -65520) return 0xFFFF; > You probably wanted to return fc00 here, if you are trying to emulate case 1. > Otherwise remove both CONFUSION HERE statements, change the if(exp > 30) case > to exp > 31 and set ret = 0x7fff there. This will give you case 2. > >> math.c:2249: Test failed: Got 6400, expected 7fff or 7fff for index 18. >> math.c:2249: Test failed: Got 6400, expected ffff or ffff for index 19. > Hmm, I am afraid you can't remove the check for NaN entirely. If you try to > any float operation on NaN undefined things are going to happen(Well, you get > NaN, and later try to pick its bits apart) > > -----BEGIN PGP SIGNATURE----- > Version: GnuPG/MacGPG2 v2.0.17 (Darwin) > > iQIcBAEBAgAGBQJNUH4mAAoJEN0/YqbEcdMwiNcP/R4sa+Z6L39FtN8qE1nX+Pwj > KER3a4MWQ8DWDQgjXR09l3kYua9tM8kH5XmZNNpthH5Uck/CCdFOfCx+j3+CLuJq > nbFkT5kwVA4qzv67o9qJxJhCaL2+9rXtB6ADOSU3QfLYn7L/UXzxwENzxgQlvFG5 > +0GzkdlimMGB8+Rj8mYte/ORFEQQnLoCFe3d1UT9RQQhOCqQvlDuaptBWQCiFiAc > GgSQuBzC7YbZxLz8i0EDjfn50X5lW0iVo0p682RJzmod+oCeFztjY/G33wGyA1MA > 45Ui6onTYpN1OES1esw/WtePkM8dKrSDlw+gu5rmU9c8JwhPFnNvs/1FUvHD1bGG > WjHkv3hLCf3XVjMk1aT70ItFQ8ph2b6IlU3GOSvQvrYbDB8jfb7lbJnuqzd4hrd4 > ycTzAf5YULwuQ5m7Ie5QtWO+hG0iB/I0aqGIAnNj6tSO3b/bfmBlQKAauVnoUnDq > sYgIEmW0GO15Ndsd/ipfbh414qQsNPyGQNdnhivkJK2PrVaiyXb/ztJf/vLDKjEw > cRPoaAfQyIsfPD+cqKq4pnw2rwvr2tkOby11UQ8jfDeaTldp3redf0py0o37CGq1 > 520QC0CqC/A4MHawwbgcbFTi6XX/DCkHW4IOEirJvnweV8mw8V9IP4XyY/xmYepV > ukB4hwY8XvIe4/Zxj8IP > =bs4K > -----END PGP SIGNATURE----- >
Wow, thanks so much. That really helped. The attached patch now passes all Wine tests and Windows tests as well: https://testbot.winehq.org/JobDetails.pl?Key=9020 I had to use the signbit function as the > 0.0f check does not work for NaNs. Any other suggestions for the patch before re-submission to wine-patches? And sorry if I missed is Henri on vacation? Thanks Misha
From 06d992ea5e55a36222b2e426fe66be55fe2aaa96 Mon Sep 17 00:00:00 2001 From: Misha Koshelev <misha...@gmail.com> Date: Mon, 7 Feb 2011 20:28:01 -0500 Subject: d3dx9: Implement D3DXFloat32To16Array and D3DXFloat16To32Array. To: wine-patches <wine-patc...@winehq.org> Reply-To: wine-devel <wine-devel@winehq.org> --- dlls/d3dx9_36/d3dx9_36.spec | 4 +- dlls/d3dx9_36/math.c | 87 +++++++++++++++++++++++++++++++++++++++++++ dlls/d3dx9_36/tests/math.c | 48 +++++++++++++++++++++++ include/d3dx9math.h | 18 +++++++++ include/d3dx9math.inl | 31 +++++++++++++++ 5 files changed, 186 insertions(+), 2 deletions(-) diff --git a/dlls/d3dx9_36/d3dx9_36.spec b/dlls/d3dx9_36/d3dx9_36.spec index cbb6d20..55fe0cb 100644 --- a/dlls/d3dx9_36/d3dx9_36.spec +++ b/dlls/d3dx9_36/d3dx9_36.spec @@ -130,8 +130,8 @@ @ stub D3DXFillVolumeTextureTX @ stdcall D3DXFilterTexture(ptr ptr long long) @ stdcall D3DXFindShaderComment(ptr long ptr ptr) -@ stub D3DXFloat16To32Array -@ stub D3DXFloat32To16Array +@ stdcall D3DXFloat16To32Array(ptr ptr long) +@ stdcall D3DXFloat32To16Array(ptr ptr long) @ stub D3DXFrameAppendChild @ stub D3DXFrameCalculateBoundingSphere @ stub D3DXFrameDestroy diff --git a/dlls/d3dx9_36/math.c b/dlls/d3dx9_36/math.c index fdb5f92..d2617ac 100644 --- a/dlls/d3dx9_36/math.c +++ b/dlls/d3dx9_36/math.c @@ -1769,3 +1769,90 @@ D3DXVECTOR4* WINAPI D3DXVec4TransformArray(D3DXVECTOR4* out, UINT outstride, CON } return out; } + +static inline unsigned short float_32_to_16(const float *in) +{ + int exp = 0; + float tmp = fabs(*in); + unsigned int mantissa; + unsigned short ret; + + /* Deal with special numbers */ + if (*((unsigned int *) in) == 0x00000000) return 0x0000; + if (*((unsigned int *) in) == 0x80000000) return 0x8000; + if (isinf(*in)) return (signbit(*in) ? 0xffff : 0x7c00); + if (isnan(*in)) return (signbit(*in) ? 0xffff : 0x7fff); + if (fabs(*in) > 65520) return (signbit(*in) ? 0xfc00 : 0x7c00); + + if (tmp < powf(2, 10)) { + do + { + tmp = tmp * 2.0f; + exp--; + } while (tmp < powf(2, 10)); + } else if (tmp >= powf(2, 11)) { + do + { + tmp /= 2.0f; + exp++; + } while (tmp >= powf(2, 11)); + } + + mantissa = (unsigned int) tmp; + if (tmp - mantissa > 0.5f) mantissa++; /* round to nearest, away from zero */ + + exp += 10; /* Normalize the mantissa */ + exp += 15; /* Exponent is encoded with excess 15 */ + + if (exp > 31) { /* too big */ + ret = 0x7fff; /* INF */ + } else if (exp <= 0) { + /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers */ + while (exp <= 0) { + mantissa = mantissa >> 1; + exp++; + } + ret = mantissa & 0x3ff; + } else { + ret = (exp << 10) | (mantissa & 0x3ff); + } + + ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */ + return ret; +} + +D3DXFLOAT16 *WINAPI D3DXFloat32To16Array(D3DXFLOAT16 *pout, CONST FLOAT *pin, UINT n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) { + pout[i].value = float_32_to_16(&pin[i]); + } + + return pout; +} + +static inline float float_16_to_32(const unsigned short *in) { + const unsigned short s = ((*in) & 0x8000); + const unsigned short e = ((*in) & 0x7C00) >> 10; + const unsigned short m = (*in) & 0x3FF; + const float sgn = (s ? -1.0f : 1.0f); + + if (e == 0) { + if (m == 0) return sgn * 0.0f; /* +0.0 or -0.0 */ + else return sgn * powf(2, -14.0f) * ((float)m / 1024.0f); + } else { + return sgn * powf(2, (float)e - 15.0f) * (1.0f + ((float)m / 1024.0f)); + } +} + +FLOAT *WINAPI D3DXFloat16To32Array(FLOAT *pout, CONST D3DXFLOAT16 *pin, UINT n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) { + pout[i] = float_16_to_32(&pin[i].value); + } + + return pout; +} diff --git a/dlls/d3dx9_36/tests/math.c b/dlls/d3dx9_36/tests/math.c index e455a96..0d8c5eb 100644 --- a/dlls/d3dx9_36/tests/math.c +++ b/dlls/d3dx9_36/tests/math.c @@ -21,6 +21,7 @@ #include "wine/test.h" #include "d3dx9.h" +#include <math.h> #define ARRAY_SIZE 5 @@ -2216,6 +2217,52 @@ static void test_D3DXVec_Array(void) compare_planes(exp_plane, out_plane); } +static void test_D3DXFloat_Array(void) +{ + unsigned int i; + void *out = NULL; + D3DXFLOAT16 half_res[22]; + FLOAT single[22] = { 80000.0f, 65503.0f, 65504.0f, 65520.0f, 65521.0f, 65534.0f, 65535.0f, 65536.0f, + -80000.0f, -65503.0f, -65504.0f, -65520.0f, -65521.0f, -65534.0f, -65535.0f, -65536.0f, + INFINITY, -INFINITY, NAN, -NAN, 0.0f, -0.0f }, + single_exp[22] = { 65536.0f, 65504.0f, 65504.0f, 65504.0f, 65536.0f, 65536.0f, 65535.0f, 65536.0f, + -65536.0f, -65504.0f, -65504.0f, -65504.0f, -65536.0f, -65536.0f, -65535.0f, -65536.0f, + 65536.0f, -131008.0f, 131008.0f, -131008.0f, 0.0f, -0.0f }, + single_res[22]; + /* half_ver2 occurs on WXPPROSP3 (32 bit math), WVISTAADM (32 bit math), W7PRO (32 bit math) */ + WORD half_ver1[22] = { 0x7c00, 0x7bff, 0x7bff, 0x7bff, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0xfc00, 0xfbff, 0xfbff, 0xfbff, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0x7c00, 0xffff, 0x7fff, 0xffff, 0x0, 0x8000 }, + half_ver2[22] = { 0x7ce2, 0x7bff, 0x7bff, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0xfce2, 0xfbff, 0xfbff, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0x7fff, 0xffff, 0x7fff, 0xffff, 0x0, 0x8000 }; + + /* exception on NULL out or in parameter */ + out = D3DXFloat32To16Array(half_res, single, 0); + ok(out == half_res, "Got %p, expected %p.\n", out, half_res); + + out = D3DXFloat32To16Array(half_res, single, 22); + ok(out == half_res, "Got %p, expected %p.\n", out, half_res); + + for (i = 0; i < 22; i++) { + ok(half_res[i].value == half_ver1[i] || + half_res[i].value == half_ver2[i], "Got %x, expected %x or %x for index %d.\n", + half_res[i].value, half_ver1[i], half_ver2[i], i); + } + + /* exception on NULL out or in parameter */ + out = D3DXFloat16To32Array(single_res, (D3DXFLOAT16 *)half_ver1, 0); + ok(out == single_res, "Got %p, expected %p.\n", out, single_res); + + out = D3DXFloat16To32Array(single_res, (D3DXFLOAT16 *)half_ver1, 22); + ok(out == single_res, "Got %p, expected %p.\n", out, single_res); + + for (i = 0; i < 22; i++) { + ok(relative_error(single_res[i], single_exp[i]) < admitted_error, "Got %f, expected %f for index %d.\n", + single_res[i], single_exp[i], i); + } +} + START_TEST(math) { D3DXColorTest(); @@ -2231,4 +2278,5 @@ START_TEST(math) test_Matrix_Decompose(); test_Matrix_Transformation2D(); test_D3DXVec_Array(); + test_D3DXFloat_Array(); } diff --git a/include/d3dx9math.h b/include/d3dx9math.h index f842e3e..cdb1deb 100644 --- a/include/d3dx9math.h +++ b/include/d3dx9math.h @@ -261,6 +261,21 @@ typedef struct D3DXCOLOR FLOAT r, g, b, a; } D3DXCOLOR, *LPD3DXCOLOR; +typedef struct D3DXFLOAT16 +{ +#ifdef __cplusplus + D3DXFLOAT16(); + D3DXFLOAT16(FLOAT f); + D3DXFLOAT16(CONST D3DXFLOAT16 &f); + + operator FLOAT (); + + BOOL operator == (CONST D3DXFLOAT16 &) const; + BOOL operator != (CONST D3DXFLOAT16 &) const; +#endif /* __cplusplus */ + WORD value; +} D3DXFLOAT16, *LPD3DXFLOAT16; + #ifdef __cplusplus extern "C" { #endif @@ -358,6 +373,9 @@ D3DXVECTOR4* WINAPI D3DXVec4Normalize(D3DXVECTOR4 *pout, CONST D3DXVECTOR4 *pv); D3DXVECTOR4* WINAPI D3DXVec4Transform(D3DXVECTOR4 *pout, CONST D3DXVECTOR4 *pv, CONST D3DXMATRIX *pm); D3DXVECTOR4* WINAPI D3DXVec4TransformArray(D3DXVECTOR4 *pout, UINT outstride, CONST D3DXVECTOR4 *pv, UINT vstride, CONST D3DXMATRIX *pm, UINT n); +D3DXFLOAT16 *WINAPI D3DXFloat32To16Array(D3DXFLOAT16 *pout, CONST FLOAT *pin, UINT n); +FLOAT *WINAPI D3DXFloat16To32Array(FLOAT *pout, CONST D3DXFLOAT16 *pin, UINT n); + #ifdef __cplusplus } #endif diff --git a/include/d3dx9math.inl b/include/d3dx9math.inl index 3cd078a..3f55aef 100644 --- a/include/d3dx9math.inl +++ b/include/d3dx9math.inl @@ -851,6 +851,37 @@ inline BOOL D3DXCOLOR::operator != (CONST D3DXCOLOR& col) const return r != col.r || g != col.g || b != col.b || a != col.a; } +inline D3DXFLOAT16::D3DXFLOAT16() +{ +} + +inline D3DXFLOAT16::D3DXFLOAT16(FLOAT f) +{ + D3DXFloat32To16Array(this, &f, 1); +} + +inline D3DXFLOAT16::D3DXFLOAT16(CONST D3DXFLOAT16 &f) +{ + value = f.value; +} + +inline D3DXFLOAT16::operator FLOAT () +{ + FLOAT f; + D3DXFloat16To32Array(&f, this, 1); + return f; +} + +inline BOOL D3DXFLOAT16::operator == (CONST D3DXFLOAT16 &f) const +{ + return value == f.value; +} + +inline BOOL D3DXFLOAT16::operator != (CONST D3DXFLOAT16 &f) const +{ + return value != f.value; +} + #endif /* __cplusplus */ /*_______________D3DXCOLOR_____________________*/ -- 1.7.4