While hacking away on Eterm I noticed that when X is in 16 bpp mode and
Eterm is invoked with the option: --cmod 0x100 everything is great.
When cmod < 0x100 everything is good.  When cmod > 0x100 the Eterm
window gets trashed.  

Would someone please confirm this and if that happens to be the case
then the attached patch will correct the problem.  Notice that in the
patch I moved the local integers r, g, and b outside of the inner loop.
This will prevent the compiler from creating room on the stack for them
each time the inner loop starts (for every line).  The instruction order
has been reworked in the spirit of my last optimization patches so that
the processor stays with a color as much as possible too.

My apologies for the patch not being as MeJ instructed:

cvs diff -Nu > Eterm-0.9.4-something_spiffy.patch

but that failed because of the state of disrepair in my Eterm tree and
because I don't have a CVSROOT defined.  Until I fix the first I can't
fix the second.  The 16bpp 64 bit MMX patch is failing to match the
output of the C code (even after this patch) but at least now I can
concentrate on the assembly code.

I suspect that the same problem is present in 15bpp mode as well but I
haven't gotten to that routine yet.

-- 
Tres
--- Eterm-0.9.3-orig/src/pixmap.c	2004-07-22 14:12:31.000000000 -0600
+++ Eterm-0.9.3/src/pixmap.c	2005-05-06 08:30:20.000000000 -0600
@@ -53,9 +53,40 @@
 #include "windows.h"
 
 /* Assembler routines */
-extern void shade_ximage_15_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm);
-extern void shade_ximage_16_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm);
-extern void shade_ximage_32_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm);
+/* We don't want these defined if they are not being used. We want them to fail to find the bug */
+#ifdef HAVE_MMX
+  extern void shade_ximage_15_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm);
+  extern void shade_ximage_16_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm);
+  extern void shade_ximage_24_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm);
+  extern void shade_ximage_32_mmx(void *data, int bpl, int w, int h, int rm, int gm, int bm);
+#elif 0
+//#elifdef HAVE_MMX_64
+/* these inlines will be supplanted as each asm_mmx_64 function becomes available */
+
+/*  extern void shade_ximage_15_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm); */
+  inline void shade_ximage_15_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm)
+  {
+    shade_ximage_15_mmx_64(data, bpl, w, h, rm, gm, bm);
+  }
+
+/*  extern void shade_ximage_16_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm); */
+  inline void shade_ximage_16_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm)
+  {
+    shade_ximage_16(data, bpl, w, h, rm, gm, bm);
+  }
+
+/*  extern void shade_ximage_24_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm); */
+  inline void shade_ximage_24_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm)
+  {
+    shade_ximage_24(data, bpl, w, h, rm, gm, bm);
+  }
+
+  extern void shade_ximage_32_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm);
+/*  inline void shade_ximage_32_mmx_64(void *data, int bpl, int w, int h, int rm, int gm, int bm)
+  {
+    shade_ximage_32(data, bpl, w, h, rm, gm, bm);
+  } */
+#endif
 
 #ifdef PIXMAP_SUPPORT
 static Imlib_Border bord_none = { 0, 0, 0, 0 };
@@ -1601,19 +1632,16 @@
         }
     } else {
         for (y = h; --y >= 0;) {
+            int r, g, b;
             for (x = -w; x < 0; x++) {
-                int r, g, b;
-
                 b = ((DATA16 *) ptr)[x];
-                r = (b & 0xf800) * rm;
-                g = (b & 0x7e0) * gm;
-                b = (b & 0x1f) * bm;
-                r |= (!(r >> 16) - 1);
-                g |= (!(g >> 11) - 1);
-                b |= (!(b >> 5) - 1);
-                ((DATA16 *) ptr)[x] = ((r >> 8) & 0xf800)
-                    | ((g >> 8) & 0x7e0)
-                    | ((b >> 8) & 0x1f);
+                r = ( (b >> 11 )            * rm ) >> 8;
+		r = ( r > 0x001f ) ? 0xf800 : ( r << 11 );
+                g = (((b >>  5 ) & 0x003f ) * gm ) >> 8;
+		g = ( g > 0x003f ) ? 0x07e0 : ( g << 5 );
+                b = (( b         & 0x001f ) * bm ) >> 8;
+		b = ( b > 0x001f ) ? 0x001f : b;
+                ((DATA16 *) ptr)[x] = (r|g|b);
             }
             ptr += bpl;
         }
@@ -1635,12 +1663,12 @@
                 int r, g, b;
 
 # ifdef WORDS_BIGENDIAN
-                r = (ptr[x + 1] * rm) >> 8;
-                g = (ptr[x + 2] * gm) >> 8;
-                b = (ptr[x + 3] * bm) >> 8;
-                ptr[x + 1] = r;
-                ptr[x + 2] = g;
-                ptr[x + 3] = b;
+                r = (ptr[x + 6] * rm) >> 8;
+                g = (ptr[x + 5] * gm) >> 8;
+                b = (ptr[x + 4] * bm) >> 8;
+                ptr[x + 6] = r;
+                ptr[x + 5] = g;
+                ptr[x + 4] = b;
 # else
                 r = (ptr[x + 2] * rm) >> 8;
                 g = (ptr[x + 1] * gm) >> 8;
@@ -1658,9 +1686,9 @@
                 int r, g, b;
 
 # ifdef WORDS_BIGENDIAN
-                r = (ptr[x + 1] * rm) >> 8;
-                g = (ptr[x + 2] * gm) >> 8;
-                b = (ptr[x + 3] * bm) >> 8;
+                r = (ptr[x + 6] * rm) >> 8;
+                g = (ptr[x + 5] * gm) >> 8;
+                b = (ptr[x + 4] * bm) >> 8;
 # else
                 r = (ptr[x + 2] * rm) >> 8;
                 g = (ptr[x + 1] * gm) >> 8;
@@ -1670,9 +1698,9 @@
                 g |= (!(g >> 8) - 1);
                 b |= (!(b >> 8) - 1);
 # ifdef WORDS_BIGENDIAN
-                ptr[x + 1] = r;
-                ptr[x + 2] = g;
-                ptr[x + 3] = b;
+                ptr[x + 6] = r;
+                ptr[x + 5] = g;
+                ptr[x + 4] = b;
 # else
                 ptr[x + 2] = r;
                 ptr[x + 1] = g;
@@ -1861,6 +1889,8 @@
           case 15:
 #ifdef HAVE_MMX
               shade_ximage_15_mmx(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
+//#elifdef HAVE_MMX_64
+//              shade_ximage_15_mmx_64(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
 #else
               shade_ximage_15(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
 #endif
@@ -1868,18 +1898,28 @@
           case 16:
 #ifdef HAVE_MMX
               shade_ximage_16_mmx(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
+//#elifdef HAVE_MMX_64
+//              shade_ximage_16_mmx_64(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
 #else
               shade_ximage_16(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
 #endif
               break;
           case 24:
               if (ximg->bits_per_pixel != 32) {
+#ifdef HAVE_MMX
+                  shade_ximage_24_mmx(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
+//#elifdef HAVE_MMX_64
+//                  shade_ximage_24_mmx_64(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
+#else
                   shade_ximage_24(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
+#endif
               }
               /* drop */
           case 32:
 #ifdef HAVE_MMX
               shade_ximage_32_mmx(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
+//#elifdef HAVE_MMX_64
+//              shade_ximage_32_mmx_64(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
 #else
               shade_ximage_32(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
 #endif

Reply via email to