color: various fixes to the SSE/3DNow code

Forget about the previous patch, this one fixes all the things I listed
as errors.

Batched fixes:
- Fixed gint/glong usage in all transform code
  This fixes a segfault on amd64
- Moved femms/emms instructions before calling littleCMS. LittleCMS may
  use the FPU somehow. So we have to make sure the FPU context is right.
- Removed two unuseful "femms" opcodes in the 3DNow code
- Make sure SSE/MMX registers that we expect to be constant are restored
  after a littleCMS call.

diff --git a/src/rs-color-transform.c b/src/rs-color-transform.c
--- a/src/rs-color-transform.c
+++ b/src/rs-color-transform.c
@@ -461,7 +461,7 @@
 
 COLOR_TRANSFORM(transform_nocms8_sse)
 {
-       register glong r,g,b;
+       register gint r,g,b;
        gint destoffset;
        gint col;
        RS_DECLARE_ALIGNED(gfloat, mat, 4, 3, 16);
@@ -484,6 +484,9 @@
        mat[10] = rct->priv->color_matrix.coeff[2][2];
        mat[11] = 0.f;
 
+       /* Initialiaze some invariant mmx/see registers, as no external calls 
will
+        * be  done, we can save a few cycles getting this init out of the row
+        * loop */
        asm volatile (
                "movups (%2), %%xmm2\n\t" /* rs->pre_mul */
                "movaps (%0), %%xmm3\n\t" /* matrix */
@@ -553,7 +556,10 @@
                        s += 4;
                }
        }
+
+       /* restore FPU context for caller */
        asm volatile("emms\n\t");
+
        return;
 }
 
@@ -561,7 +567,7 @@
 {
        gint destoffset;
        gint col;
-       register glong r=0,g=0,b=0;
+       register gint r=0,g=0,b=0;
        RS_DECLARE_ALIGNED(gfloat, mat, 4, 3, 8);
 
        if ((rct==NULL) || (width<1) || (height<1) || (in == NULL) || 
(in_rowstride<8) || (out == NULL) || (out_rowstride<1))
@@ -582,8 +588,9 @@
        mat[10] = rct->priv->color_matrix.coeff[2][2];
        mat[11] = 0.f;
 
+       /* Initialiaze some invariant mmx registers, as no external calls will 
be
+        * done, we can save a few cycles getting this init out of the row loop 
*/
        asm volatile (
-               "femms\n\t"
                "pxor %%mm7, %%mm7\n\t" /* 0x0 */
                "movq (%0), %%mm2\n\t" /* pre_mul R | pre_mul G */
                "movq 8(%0), %%mm3\n\t" /* pre_mul B | pre_mul G2 */
@@ -591,6 +598,7 @@
                :
                : "r" (rct->priv->pre_mul), "r" (&top[0])
        );
+
        while(height--)
        {
                destoffset = 0;
@@ -659,6 +667,8 @@
                        d[destoffset++] = rct->priv->table8[b];
                }
        }
+
+       /* Restore FPU context for the caller */
        asm volatile ("femms\n\t");
 
        return;
@@ -667,7 +677,7 @@
 COLOR_TRANSFORM(transform_cms8_sse)
 {
        gushort *buffer = g_malloc(width*3*sizeof(gushort));
-       register glong r,g,b;
+       register gint r,g,b;
        gint destoffset;
        gint col;
        RS_DECLARE_ALIGNED(gfloat, mat, 4, 3, 16);
@@ -690,22 +700,26 @@
        mat[10] = rct->priv->color_matrix.coeff[2][2];
        mat[11] = 0.f;
 
-       asm volatile (
-               "movups (%2), %%xmm2\n\t" /* rs->pre_mul */
-               "movaps (%0), %%xmm3\n\t" /* matrix */
-               "movaps 16(%0), %%xmm4\n\t"
-               "movaps 32(%0), %%xmm5\n\t"
-               "movaps (%1), %%xmm6\n\t" /* top */
-               "pxor %%mm7, %%mm7\n\t" /* 0x0 */
-               :
-               : "r" (&mat[0]), "r" (&top[0]), "r" (rct->priv->pre_mul)
-               : "memory"
-       );
        while(height--)
        {
                destoffset = 0;
                col = width;
                gushort *s = in + height * in_rowstride;
+
+               /* Restore our invariant sse/mmx registers, littlecms may have 
used
+                * them in the previous row */
+               asm volatile (
+                       "movups (%2), %%xmm2\n\t" /* rs->pre_mul */
+                       "movaps (%0), %%xmm3\n\t" /* matrix */
+                       "movaps 16(%0), %%xmm4\n\t"
+                       "movaps 32(%0), %%xmm5\n\t"
+                       "movaps (%1), %%xmm6\n\t" /* top */
+                       "pxor %%mm7, %%mm7\n\t" /* 0x0 */
+                       :
+                       : "r" (&mat[0]), "r" (&top[0]), "r" (rct->priv->pre_mul)
+                       : "memory"
+               );
+
                while(col--)
                {
                        asm volatile (
@@ -755,9 +769,12 @@
                        buffer[destoffset++] = rct->priv->table16[b];
                        s += 4;
                }
+
+               /* Restore FPU context for littleCMS */
+               asm volatile("emms\n\t");
+
                cmsDoTransform((cmsHPROFILE) rct->priv->transform, buffer, 
out+height * out_rowstride, width);
        }
-       asm volatile("emms\n\t");
        g_free(buffer);
        return;
 }
@@ -767,7 +784,7 @@
        gushort *buffer = g_malloc(width*3*sizeof(gushort));
        gint destoffset;
        gint col;
-       register glong r=0,g=0,b=0;
+       register gint r=0,g=0,b=0;
        RS_DECLARE_ALIGNED(gfloat, mat, 4, 3, 8);
 
        if ((rct==NULL) || (width<1) || (height<1) || (in == NULL) || 
(in_rowstride<8) || (out == NULL) || (out_rowstride<1))
@@ -788,20 +805,23 @@
        mat[10] = rct->priv->color_matrix.coeff[2][2];
        mat[11] = 0.f;
 
-       asm volatile (
-               "femms\n\t"
-               "pxor %%mm7, %%mm7\n\t" /* 0x0 */
-               "movq (%0), %%mm2\n\t" /* pre_mul R | pre_mul G */
-               "movq 8(%0), %%mm3\n\t" /* pre_mul B | pre_mul G2 */
-               "movq (%1), %%mm6\n\t" /* 65535.0 | 65535.0 */
-               :
-               : "r" (rct->priv->pre_mul), "r" (&top[0])
-       );
        while(height--)
        {
                destoffset = 0;
                col = width;
                gushort *s = in + height * in_rowstride;
+
+               /* Restore our invariant mmx registers, littlecms may have used 
them
+                * in the previous row */
+               asm volatile (
+                       "pxor %%mm7, %%mm7\n\t" /* 0x0 */
+                       "movq (%0), %%mm2\n\t" /* pre_mul R | pre_mul G */
+                       "movq 8(%0), %%mm3\n\t" /* pre_mul B | pre_mul G2 */
+                       "movq (%1), %%mm6\n\t" /* 65535.0 | 65535.0 */
+                       :
+                       : "r" (rct->priv->pre_mul), "r" (&top[0])
+               );
+
                while(col--)
                {
                        asm volatile (
@@ -863,9 +883,12 @@
                        buffer[destoffset++] = rct->priv->table16[g];
                        buffer[destoffset++] = rct->priv->table16[b];
                }
+
+               /* Restore the FPU context for littleCMS */
+               asm volatile ("emms\n\t");
+
                cmsDoTransform((cmsHPROFILE) rct->priv->transform, buffer, 
out+height * out_rowstride, width);
        }
-       asm volatile ("femms\n\t");
        g_free(buffer);
        return;
 }

_______________________________________________
Rawstudio-dev mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-dev

Reply via email to