While hacking away on Eterm I noticed that when X is in 16 bpp mode and Eterm is invoked with the option: --cmod 0x100 everything is great. When cmod < 0x100 everything is good. When cmod > 0x100 the Eterm window gets trashed.
Would someone please confirm this and if that happens to be the case then the attached patch will correct the problem. Notice that in the patch I moved the local integers r, g, and b outside of the inner loop. This will prevent the compiler from creating room on the stack for them each time the inner loop starts (for every line). The instruction order has been reworked in the spirit of my last optimization patches so that the processor stays with a color as much as possible too. My apologies for the patch not being as MeJ instructed: cvs diff -Nu > Eterm-0.9.4-something_spiffy.patch but that failed because of the state of disrepair in my Eterm tree and because I don't have a CVSROOT defined. Until I fix the first I can't fix the second. The 16bpp 64 bit MMX patch is failing to match the output of the C code (even after this patch) but at least now I can concentrate on the assembly code. I suspect that the same problem is present in 15bpp mode as well but I haven't gotten to that routine yet. -- Tres
--- Eterm-0.9.3-orig/src/pixmap.c 2004-07-22 14:12:31.000000000 -0600 +++ Eterm-0.9.3/src/pixmap.c 2005-05-06 08:30:20.000000000 -0600 @@ -53,9 +53,40 @@ #include "windows.h" /* Assembler routines */ -extern void shade_ximage_15_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm); -extern void shade_ximage_16_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm); -extern void shade_ximage_32_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm); +/* We don't want these defined if they are not being used. We want them to fail to find the bug */ +#ifdef HAVE_MMX + extern void shade_ximage_15_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm); + extern void shade_ximage_16_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm); + extern void shade_ximage_24_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm); + extern void shade_ximage_32_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm); +#elif 0 +//#elifdef HAVE_MMX_64 +/* these inlines will be supplanted as each asm_mmx_64 function becomes available */ + +/* extern void shade_ximage_15_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm); */ + inline void shade_ximage_15_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm) + { + shade_ximage_15_mmx_64(data, bpl, w, h, rm, gm, bm); + } + +/* extern void shade_ximage_16_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm); */ + inline void shade_ximage_16_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm) + { + shade_ximage_16(data, bpl, w, h, rm, gm, bm); + } + +/* extern void shade_ximage_24_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm); */ + inline void shade_ximage_24_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm) + { + shade_ximage_24(data, bpl, w, h, rm, gm, bm); + } + + extern void shade_ximage_32_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm); +/* inline void shade_ximage_32_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm) + { + shade_ximage_32(data, bpl, w, h, rm, gm, bm); + } */ +#endif #ifdef PIXMAP_SUPPORT static Imlib_Border bord_none = { 0, 0, 0, 0 }; @@ -1601,19 +1632,16 @@ } } else { for (y = h; --y >= 0;) { + int r, g, b; for (x = -w; x < 0; x++) { - int r, g, b; - b = ((DATA16 *) ptr)[x]; - r = (b & 0xf800) * rm; - g = (b & 0x7e0) * gm; - b = (b & 0x1f) * bm; - r |= (!(r >> 16) - 1); - g |= (!(g >> 11) - 1); - b |= (!(b >> 5) - 1); - ((DATA16 *) ptr)[x] = ((r >> 8) & 0xf800) - | ((g >> 8) & 0x7e0) - | ((b >> 8) & 0x1f); + r = ( (b >> 11 ) * rm ) >> 8; + r = ( r > 0x001f ) ? 0xf800 : ( r << 11 ); + g = (((b >> 5 ) & 0x003f ) * gm ) >> 8; + g = ( g > 0x003f ) ? 0x07e0 : ( g << 5 ); + b = (( b & 0x001f ) * bm ) >> 8; + b = ( b > 0x001f ) ? 0x001f : b; + ((DATA16 *) ptr)[x] = (r|g|b); } ptr += bpl; } @@ -1635,12 +1663,12 @@ int r, g, b; # ifdef WORDS_BIGENDIAN - r = (ptr[x + 1] * rm) >> 8; - g = (ptr[x + 2] * gm) >> 8; - b = (ptr[x + 3] * bm) >> 8; - ptr[x + 1] = r; - ptr[x + 2] = g; - ptr[x + 3] = b; + r = (ptr[x + 6] * rm) >> 8; + g = (ptr[x + 5] * gm) >> 8; + b = (ptr[x + 4] * bm) >> 8; + ptr[x + 6] = r; + ptr[x + 5] = g; + ptr[x + 4] = b; # else r = (ptr[x + 2] * rm) >> 8; g = (ptr[x + 1] * gm) >> 8; @@ -1658,9 +1686,9 @@ int r, g, b; # ifdef WORDS_BIGENDIAN - r = (ptr[x + 1] * rm) >> 8; - g = (ptr[x + 2] * gm) >> 8; - b = (ptr[x + 3] * bm) >> 8; + r = (ptr[x + 6] * rm) >> 8; + g = (ptr[x + 5] * gm) >> 8; + b = (ptr[x + 4] * bm) >> 8; # else r = (ptr[x + 2] * rm) >> 8; g = (ptr[x + 1] * gm) >> 8; @@ -1670,9 +1698,9 @@ g |= (!(g >> 8) - 1); b |= (!(b >> 8) - 1); # ifdef WORDS_BIGENDIAN - ptr[x + 1] = r; - ptr[x + 2] = g; - ptr[x + 3] = b; + ptr[x + 6] = r; + ptr[x + 5] = g; + ptr[x + 4] = b; # else ptr[x + 2] = r; ptr[x + 1] = g; @@ -1861,6 +1889,8 @@ case 15: #ifdef HAVE_MMX shade_ximage_15_mmx(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); +//#elifdef HAVE_MMX_64 +// shade_ximage_15_mmx_64(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); #else shade_ximage_15(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); #endif @@ -1868,18 +1898,28 @@ case 16: #ifdef HAVE_MMX shade_ximage_16_mmx(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); +//#elifdef HAVE_MMX_64 +// shade_ximage_16_mmx_64(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); #else shade_ximage_16(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); #endif break; case 24: if (ximg->bits_per_pixel != 32) { +#ifdef HAVE_MMX + shade_ximage_24_mmx(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); +//#elifdef HAVE_MMX_64 +// shade_ximage_24_mmx_64(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); +#else shade_ximage_24(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); +#endif } /* drop */ case 32: #ifdef HAVE_MMX shade_ximage_32_mmx(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); +//#elifdef HAVE_MMX_64 +// shade_ximage_32_mmx_64(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); #else shade_ximage_32(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm); #endif