color: various fixes to the SSE/3DNow code
Forget about the previous patch, this one fixes all the things I listed
as errors.
Batched fixes:
- Fixed gint/glong usage in all transform code
This fixes a segfault on amd64
- Moved femms/emms instructions before calling littleCMS. LittleCMS may
use the FPU somehow. So we have to make sure the FPU context is right.
- Removed two unuseful "femms" opcodes in the 3DNow code
- Make sure SSE/MMX registers that we expect to be constant are restored
after a littleCMS call.
diff --git a/src/rs-color-transform.c b/src/rs-color-transform.c
--- a/src/rs-color-transform.c
+++ b/src/rs-color-transform.c
@@ -461,7 +461,7 @@
COLOR_TRANSFORM(transform_nocms8_sse)
{
- register glong r,g,b;
+ register gint r,g,b;
gint destoffset;
gint col;
RS_DECLARE_ALIGNED(gfloat, mat, 4, 3, 16);
@@ -484,6 +484,9 @@
mat[10] = rct->priv->color_matrix.coeff[2][2];
mat[11] = 0.f;
+ /* Initialiaze some invariant mmx/see registers, as no external calls
will
+ * be done, we can save a few cycles getting this init out of the row
+ * loop */
asm volatile (
"movups (%2), %%xmm2\n\t" /* rs->pre_mul */
"movaps (%0), %%xmm3\n\t" /* matrix */
@@ -553,7 +556,10 @@
s += 4;
}
}
+
+ /* restore FPU context for caller */
asm volatile("emms\n\t");
+
return;
}
@@ -561,7 +567,7 @@
{
gint destoffset;
gint col;
- register glong r=0,g=0,b=0;
+ register gint r=0,g=0,b=0;
RS_DECLARE_ALIGNED(gfloat, mat, 4, 3, 8);
if ((rct==NULL) || (width<1) || (height<1) || (in == NULL) ||
(in_rowstride<8) || (out == NULL) || (out_rowstride<1))
@@ -582,8 +588,9 @@
mat[10] = rct->priv->color_matrix.coeff[2][2];
mat[11] = 0.f;
+ /* Initialiaze some invariant mmx registers, as no external calls will
be
+ * done, we can save a few cycles getting this init out of the row loop
*/
asm volatile (
- "femms\n\t"
"pxor %%mm7, %%mm7\n\t" /* 0x0 */
"movq (%0), %%mm2\n\t" /* pre_mul R | pre_mul G */
"movq 8(%0), %%mm3\n\t" /* pre_mul B | pre_mul G2 */
@@ -591,6 +598,7 @@
:
: "r" (rct->priv->pre_mul), "r" (&top[0])
);
+
while(height--)
{
destoffset = 0;
@@ -659,6 +667,8 @@
d[destoffset++] = rct->priv->table8[b];
}
}
+
+ /* Restore FPU context for the caller */
asm volatile ("femms\n\t");
return;
@@ -667,7 +677,7 @@
COLOR_TRANSFORM(transform_cms8_sse)
{
gushort *buffer = g_malloc(width*3*sizeof(gushort));
- register glong r,g,b;
+ register gint r,g,b;
gint destoffset;
gint col;
RS_DECLARE_ALIGNED(gfloat, mat, 4, 3, 16);
@@ -690,22 +700,26 @@
mat[10] = rct->priv->color_matrix.coeff[2][2];
mat[11] = 0.f;
- asm volatile (
- "movups (%2), %%xmm2\n\t" /* rs->pre_mul */
- "movaps (%0), %%xmm3\n\t" /* matrix */
- "movaps 16(%0), %%xmm4\n\t"
- "movaps 32(%0), %%xmm5\n\t"
- "movaps (%1), %%xmm6\n\t" /* top */
- "pxor %%mm7, %%mm7\n\t" /* 0x0 */
- :
- : "r" (&mat[0]), "r" (&top[0]), "r" (rct->priv->pre_mul)
- : "memory"
- );
while(height--)
{
destoffset = 0;
col = width;
gushort *s = in + height * in_rowstride;
+
+ /* Restore our invariant sse/mmx registers, littlecms may have
used
+ * them in the previous row */
+ asm volatile (
+ "movups (%2), %%xmm2\n\t" /* rs->pre_mul */
+ "movaps (%0), %%xmm3\n\t" /* matrix */
+ "movaps 16(%0), %%xmm4\n\t"
+ "movaps 32(%0), %%xmm5\n\t"
+ "movaps (%1), %%xmm6\n\t" /* top */
+ "pxor %%mm7, %%mm7\n\t" /* 0x0 */
+ :
+ : "r" (&mat[0]), "r" (&top[0]), "r" (rct->priv->pre_mul)
+ : "memory"
+ );
+
while(col--)
{
asm volatile (
@@ -755,9 +769,12 @@
buffer[destoffset++] = rct->priv->table16[b];
s += 4;
}
+
+ /* Restore FPU context for littleCMS */
+ asm volatile("emms\n\t");
+
cmsDoTransform((cmsHPROFILE) rct->priv->transform, buffer,
out+height * out_rowstride, width);
}
- asm volatile("emms\n\t");
g_free(buffer);
return;
}
@@ -767,7 +784,7 @@
gushort *buffer = g_malloc(width*3*sizeof(gushort));
gint destoffset;
gint col;
- register glong r=0,g=0,b=0;
+ register gint r=0,g=0,b=0;
RS_DECLARE_ALIGNED(gfloat, mat, 4, 3, 8);
if ((rct==NULL) || (width<1) || (height<1) || (in == NULL) ||
(in_rowstride<8) || (out == NULL) || (out_rowstride<1))
@@ -788,20 +805,23 @@
mat[10] = rct->priv->color_matrix.coeff[2][2];
mat[11] = 0.f;
- asm volatile (
- "femms\n\t"
- "pxor %%mm7, %%mm7\n\t" /* 0x0 */
- "movq (%0), %%mm2\n\t" /* pre_mul R | pre_mul G */
- "movq 8(%0), %%mm3\n\t" /* pre_mul B | pre_mul G2 */
- "movq (%1), %%mm6\n\t" /* 65535.0 | 65535.0 */
- :
- : "r" (rct->priv->pre_mul), "r" (&top[0])
- );
while(height--)
{
destoffset = 0;
col = width;
gushort *s = in + height * in_rowstride;
+
+ /* Restore our invariant mmx registers, littlecms may have used
them
+ * in the previous row */
+ asm volatile (
+ "pxor %%mm7, %%mm7\n\t" /* 0x0 */
+ "movq (%0), %%mm2\n\t" /* pre_mul R | pre_mul G */
+ "movq 8(%0), %%mm3\n\t" /* pre_mul B | pre_mul G2 */
+ "movq (%1), %%mm6\n\t" /* 65535.0 | 65535.0 */
+ :
+ : "r" (rct->priv->pre_mul), "r" (&top[0])
+ );
+
while(col--)
{
asm volatile (
@@ -863,9 +883,12 @@
buffer[destoffset++] = rct->priv->table16[g];
buffer[destoffset++] = rct->priv->table16[b];
}
+
+ /* Restore the FPU context for littleCMS */
+ asm volatile ("emms\n\t");
+
cmsDoTransform((cmsHPROFILE) rct->priv->transform, buffer,
out+height * out_rowstride, width);
}
- asm volatile ("femms\n\t");
g_free(buffer);
return;
}
_______________________________________________
Rawstudio-dev mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-dev