# HG changeset patch
# User Edouard Gomez <[EMAIL PROTECTED]>
# Date 1188337644 -7200
# Node ID ee1c11bb789434408be2d0e47cc99888b1362e89
# Parent d7f6afbf0d105cc9e5bbea2f38f5bf04c204a7e2
Use the curve as a luminance curve only
diff -r d7f6afbf0d10 -r ee1c11bb7894 src/rs-color-transform.c
--- a/src/rs-color-transform.c Tue Aug 28 23:47:24 2007 +0200
+++ b/src/rs-color-transform.c Tue Aug 28 23:47:24 2007 +0200
@@ -25,6 +25,25 @@ static void make_tables(RS_COLOR_TRANSFO
static void make_tables(RS_COLOR_TRANSFORM *rct);
static gboolean select_render(RS_COLOR_TRANSFORM *rct);
+/* Until i find out why 3dnow doesn't look like c/float/sse
+ * i keep this handy */
+#define USE_3DNOW_LUMINANCE_CURVE
+
+/* The XYZ tri stimulus luminance coefficients */
+#define LUM_XYZ_R (0.212671f)
+#define LUM_XYZ_G (0.715160f)
+#define LUM_XYZ_B (0.072169f)
+
+/* Fixed precision version for better performance
+ * Cannot be >13 or the code overflows in some places
+ * and above 15 can generate sigsevs */
+#define LUM_PRECISION 13
+#define LUM_FIXED(a) ((gint)((a)*(1<<LUM_PRECISION)))
+#define LUM_FIXED_HALFONE LUM_FIXED(0.5f)
+#define LUM_FIXED_XYZ_R LUM_FIXED(LUM_XYZ_R)
+#define LUM_FIXED_XYZ_G LUM_FIXED(LUM_XYZ_G)
+#define LUM_FIXED_XYZ_B LUM_FIXED(LUM_XYZ_B)
+
/* Function pointers - initialized by arch binders */
COLOR_TRANSFORM(*transform_nocms8);
COLOR_TRANSFORM(*transform_cms8);
@@ -50,6 +69,8 @@ struct _RS_COLOR_TRANSFORM_PRIVATE {
gint nknots;
gfloat *knots;
gfloat curve_samples[65536];
+ guint luminance_fixed[65536];
+ gfloat luminance_float[65535];
void *transform;
};
@@ -308,8 +329,48 @@ make_tables(RS_COLOR_TRANSFORM *rct)
nd = ((gdouble) n) * rec65535;
nd = pow(nd, gammavalue);
- if (likely(rct->priv->curve_samples))
- nd = (gdouble) rct->priv->curve_samples[((gint)
(nd*65535.0f))];
+ /* The idea is to use the curve to boost/dcrease
+ * luminance only.
+ * So we have to compute Y, map it to its new value
+ * and then compute a factor that would keep this
+ * new luminance if applied to the RGB triplet.
+ *
+ * Quite straight forward; let's do it quick...
+ * Y = a.R + b.G + c.B
+ *
+ * We map Y to Y' according to the curve:
+ * Y' = curve(Y)
+ *
+ * let's compute a real 'd' such that:
+ * Y' = d.Y
+ *
+ * Then we have
+ * Y' = d.(a.R + b.G + c.B)
+ * or written a bit differently:
+ * Y' = a.(d.R) + b.(d.G) + c.(d.B)
+ *
+ * So the RGB triplet we are looking for is (d.R, d.G, d.B)
+ *
+ * The luminance LUTs store curve(Y)/Y as floating and fixed
+ * values so it's very easy to directly multiply the mapped
+ * value and the RGB vectors w/o any need for div except the
+ * one when generating the table. Which represents 65536 divs,
+ * which is equivalent to a 256x256 picture. So computing the
+ * LUTs saves divs in most cases (any raw smaller than
+ * 2048x2048 ? :-))
+ *
+ * NB: luminance curve is applied in linear space before
+ * any gamma */
+ if (likely(rct->priv->curve_samples)) {
+ gfloat Yp;
+ gfloat Y;
+
+ Y = (n > 0) ? (gfloat)n : 1.f;
+ Yp = rct->priv->curve_samples[n];
+ Yp = Yp < 0.f ? 0.f : (Yp > 1.f) ? 1.f : Yp;
+ rct->priv->luminance_float[n] = Yp*65536.f/Y;
+ rct->priv->luminance_fixed[n] =
LUM_FIXED(rct->priv->luminance_float[n]);
+ }
nd = nd*contrast+postadd;
@@ -431,6 +492,8 @@ COLOR_TRANSFORM(transform_nocms_float)
srcoffset = y * in_rowstride;
for(x=0 ; x<width ; x++)
{
+ gfloat Y;
+
/* pre multipliers */
r1 = in[srcoffset+R] * rct->priv->pre_mul[R];
g1 = in[srcoffset+G] * rct->priv->pre_mul[G];
@@ -456,6 +519,18 @@ COLOR_TRANSFORM(transform_nocms_float)
r = r2;
g = g2;
b = b2;
+
+ /* clamp to unsigned short */
+ _CLAMP65535_TRIPLET(r,g,b);
+
+ // Compute luminance and extract from the LUT curve[Y]/Y
+ Y = LUM_XYZ_R*r + LUM_XYZ_G*g + LUM_XYZ_B*b + 0.5f;
+ Y =
rct->priv->luminance_float[(Y<0.f)?0:(Y>65535.f)?65535:(int)Y];
+
+ // Scale RGB according to that factor
+ r = (int)((gfloat)r*Y);
+ g = (int)((gfloat)g*Y);
+ b = (int)((gfloat)b*Y);
/* clamp to unsigned short */
_CLAMP65535_TRIPLET(r,g,b);
@@ -490,24 +565,18 @@ COLOR_TRANSFORM(transform_nocms8_sse)
gint col;
gfloat top[4] align(16) = {65535.0, 65535.0, 65535.0, 65535.0};
gfloat mat[12] align(16) = {
+ LUM_XYZ_R,
rct->priv->color_matrix.coeff[0][0],
rct->priv->color_matrix.coeff[1][0],
rct->priv->color_matrix.coeff[2][0],
- RLUM * (rct->priv->color_matrix.coeff[0][0]
- + rct->priv->color_matrix.coeff[0][1]
- + rct->priv->color_matrix.coeff[0][2]),
+ LUM_XYZ_G,
rct->priv->color_matrix.coeff[0][1],
rct->priv->color_matrix.coeff[1][1],
rct->priv->color_matrix.coeff[2][1],
- GLUM * (rct->priv->color_matrix.coeff[1][0]
- + rct->priv->color_matrix.coeff[1][1]
- + rct->priv->color_matrix.coeff[1][2]),
+ LUM_XYZ_B,
rct->priv->color_matrix.coeff[0][2],
rct->priv->color_matrix.coeff[1][2],
rct->priv->color_matrix.coeff[2][2],
- BLUM * (rct->priv->color_matrix.coeff[2][0]
- + rct->priv->color_matrix.coeff[2][1]
- + rct->priv->color_matrix.coeff[2][2])
};
asm volatile (
"movups (%2), %%xmm2\n\t" /* rs->pre_mul */
@@ -535,7 +604,7 @@ COLOR_TRANSFORM(transform_nocms8_sse)
"punpcklwd %%mm7, %%mm0\n\t" /* R | G */
"punpckhwd %%mm7, %%mm1\n\t" /* B | G2 */
"cvtpi2ps %%mm1, %%xmm0\n\t" /* B | G2 | ? | ?
*/
- "shufps $0x4E, %%xmm0, %%xmm0\n\t" /* ? | ? | B
| G2 */
+ "shufps $0x4e, %%xmm0, %%xmm0\n\t" /* ? | ? | B
| G2 */
"cvtpi2ps %%mm0, %%xmm0\n\t" /* R | G | B | G2
*/
"mulps %%xmm2, %%xmm0\n\t" /* (R | G | B | _) *
premul */
@@ -553,7 +622,7 @@ COLOR_TRANSFORM(transform_nocms8_sse)
"addps %%xmm1, %%xmm7\n\t"
"movaps %%xmm0, %%xmm1\n\t"
- "shufps $0xAA, %%xmm1, %%xmm1\n\t" /* B | B | B
| B */
+ "shufps $0xaa, %%xmm1, %%xmm1\n\t" /* B | B | B
| B */
"mulps %%xmm5, %%xmm1\n\t"
"addps %%xmm7, %%xmm1\n\t"
@@ -561,16 +630,22 @@ COLOR_TRANSFORM(transform_nocms8_sse)
"minps %%xmm6, %%xmm1\n\t" /* MIN (65535.0, in)
*/
"maxps %%xmm7, %%xmm1\n\t" /* MAX (0.0, in) */
- /* xmm1: R | G | B | _ */
-// "shufps $0xFF, %%xmm1, %%xmm1\n\t"
- "cvtss2si %%xmm1, %0\n\t"
- "shufps $0xF9, %%xmm1, %%xmm1\n\t" /* xmm1: G |
B | _ | _ */
- "cvtss2si %%xmm1, %1\n\t"
- "shufps $0xF9, %%xmm1, %%xmm1\n\t" /* xmm1: B |
_ | _ | _ */
- "cvtss2si %%xmm1, %2\n\t"
+ /* xmm1: Y | R | G | B */
+ "cvtss2si %%xmm1, %%"REG_a"\n\t" /* Extract
Luminance integer value */
+ "movss (%4, %%"REG_a", 4), %%xmm0\n\t" /* xmm0
= Y'/Y | _ | _ | _ */
+ "shufps $0, %%xmm0, %%xmm0\n\t" /* xmm0 = Y'/Y
| Y'/Y | Y'/Y | Y'/Y */
+ "mulps %%xmm0, %%xmm1\n\t" /* xmm1 = Y' | R' |
G' | B' */
+ "minps %%xmm6, %%xmm1\n\t" /* MIN (65535.0, in)
*/
+ "maxps %%xmm7, %%xmm1\n\t" /* MAX (0.0, in) */
+ "shufps $0xf9, %%xmm1, %%xmm1\n\t" /* xmm1 = R'
| G' | B' | _ */
+ "cvtss2si %%xmm1, %0\n\t" /* Extract R' */
+ "shufps $0xf9, %%xmm1, %%xmm1\n\t" /* xmm1 = G'
| B' | _ | _ */
+ "cvtss2si %%xmm1, %1\n\t" /* Extract G' */
+ "shufps $0xf9, %%xmm1, %%xmm1\n\t" /* xmm1 = B'
| _ | _ | _ */
+ "cvtss2si %%xmm1, %2\n\t" /* Extract B' */
: "=r" (r), "=r" (g), "=r" (b)
- : "r" (s)
- : "memory"
+ : "r" (s), "r" (rct->priv->luminance_float)
+ : "%"REG_a, "memory"
);
d[destoffset++] = rct->priv->table8[r];
d[destoffset++] = rct->priv->table8[g];
@@ -586,23 +661,29 @@ COLOR_TRANSFORM(transform_nocms8_3dnow)
{
gint destoffset;
gint col;
- register glong r=0,g=0,b=0;
- gfloat mat[12] align(8);
- gfloat top[2] align(8);
- mat[0] = rct->priv->color_matrix.coeff[0][0];
- mat[1] = rct->priv->color_matrix.coeff[0][1];
- mat[2] = rct->priv->color_matrix.coeff[0][2];
- mat[3] = 0.0;
- mat[4] = rct->priv->color_matrix.coeff[1][0];
- mat[5] = rct->priv->color_matrix.coeff[1][1];
- mat[6] = rct->priv->color_matrix.coeff[1][2];
- mat[7] = 0.0;
- mat[8] = rct->priv->color_matrix.coeff[2][0];
- mat[9] = rct->priv->color_matrix.coeff[2][1];
- mat[10] = rct->priv->color_matrix.coeff[2][2];
- mat[11] = 0.0;
- top[0] = 65535.0;
- top[1] = 65535.0;
+ gint rgbx[4] align(8) = {0, 0, 0, 0};
+ gfloat tmp[4] align(8) = { 0.f, 0.f, 0.f, 0.f};
+ gfloat mat[16] align(8) = {
+ rct->priv->color_matrix.coeff[0][0],
+ rct->priv->color_matrix.coeff[0][1],
+ rct->priv->color_matrix.coeff[0][2],
+ 0.f,
+ rct->priv->color_matrix.coeff[1][0],
+ rct->priv->color_matrix.coeff[1][1],
+ rct->priv->color_matrix.coeff[1][2],
+ 0.f,
+ rct->priv->color_matrix.coeff[2][0],
+ rct->priv->color_matrix.coeff[2][1],
+ rct->priv->color_matrix.coeff[2][2],
+ 0.f,
+ LUM_XYZ_R,
+ LUM_XYZ_G,
+ LUM_XYZ_B,
+ 0.f
+ };
+ const gfloat top[2] align(8) = { 65535.f, 65535.f};
+ const gfloat halfone[2] align(8) = { 0.5f, 0.5f};
+
asm volatile (
"femms\n\t"
"pxor %%mm7, %%mm7\n\t" /* 0x0 */
@@ -623,7 +704,7 @@ COLOR_TRANSFORM(transform_nocms8_3dnow)
asm volatile (
/* pre multiply */
"movq (%0), %%mm0\n\t" /* R | G | B | G2 */
- "movq %%mm0, %%mm1\n\t" /* R | G | B | G2 */
+ "movq (%0), %%mm1\n\t" /* R | G | B | G2 */
"punpcklwd %%mm7, %%mm0\n\t" /* R, G */
"punpckhwd %%mm7, %%mm1\n\t" /* B, G2 */
"pi2fd %%mm0, %%mm0\n\t" /* to float */
@@ -635,49 +716,97 @@ COLOR_TRANSFORM(transform_nocms8_3dnow)
"pfmax %%mm7, %%mm0\n\t"
"pfmax %%mm7, %%mm1\n\t"
+ "movq (%5), %%mm3\n\t" /* mm3 = halfone */
"add $8, %0\n\t" /* increment offset */
/* red */
- "movq (%4), %%mm4\n\t" /* mat[0] | mat[1] */
- "movq 8(%4), %%mm5\n\t" /* mat[2] | mat[3] */
+ "movq (%1), %%mm4\n\t" /* mat[0] | mat[1] */
+ "movq 8(%1), %%mm5\n\t" /* mat[2] | mat[3] */
"pfmul %%mm0, %%mm4\n\t" /* R*[0] | G*[1] */
"pfmul %%mm1, %%mm5\n\t" /* B*[2] | G2*[3] */
"pfadd %%mm4, %%mm5\n\t" /* R*[0] + B*[2] |
G*[1] + G2*[3] */
"pfacc %%mm5, %%mm5\n\t" /* R*[0] + B*[2] +
G*[1] + G2*[3] | ? */
"pfmin %%mm6, %%mm5\n\t"
"pfmax %%mm7, %%mm5\n\t"
- "pf2id %%mm5, %%mm5\n\t" /* to integer */
- "movd %%mm5, %1\n\t" /* write r */
+ "movd %%mm5, (%3)\n\t" /* write to tmp[0] */
/* green */
- "movq 16(%4), %%mm4\n\t"
- "movq 24(%4), %%mm5\n\t"
+ "movq 16(%1), %%mm4\n\t"
+ "movq 24(%1), %%mm5\n\t"
"pfmul %%mm0, %%mm4\n\t"
"pfmul %%mm1, %%mm5\n\t"
"pfadd %%mm4, %%mm5\n\t"
"pfacc %%mm5, %%mm5\n\t"
"pfmin %%mm6, %%mm5\n\t"
"pfmax %%mm7, %%mm5\n\t"
- "pf2id %%mm5, %%mm5\n\t"
- "movd %%mm5, %2\n\t"
+ "movd %%mm5, 4(%3)\n\t" /* write to tmp[1] */
/* blue */
- "movq 32(%4), %%mm4\n\t"
- "movq 40(%4), %%mm5\n\t"
+ "movq 32(%1), %%mm4\n\t"
+ "movq 40(%1), %%mm5\n\t"
"pfmul %%mm0, %%mm4\n\t"
"pfmul %%mm1, %%mm5\n\t"
"pfadd %%mm4, %%mm5\n\t"
"pfacc %%mm5, %%mm5\n\t"
"pfmin %%mm6, %%mm5\n\t"
"pfmax %%mm7, %%mm5\n\t"
+ "movd %%mm5, 8(%3)\n\t" /* write to tmp[2] */
+
+#ifdef USE_3DNOW_LUMINANCE_CURVE
+ /* Luminance */
+ "movq 48(%1), %%mm4\n\t"
+ "movq 56(%1), %%mm5\n\t"
+ "pfmul %%mm0, %%mm4\n\t"
+ "pfmul %%mm1, %%mm5\n\t"
+ "pfadd %%mm4, %%mm5\n\t"
+ "pfacc %%mm5, %%mm5\n\t"
+ "pfmin %%mm6, %%mm5\n\t"
+ "pfmax %%mm7, %%mm5\n\t"
+ "pfadd %%mm3, %%mm5\n\t" /* +0.5 to have real
round */
"pf2id %%mm5, %%mm5\n\t"
- "movd %%mm5, %3\n\t"
- : "+r" (s), "+r" (r), "+r" (g), "+r" (b)
- : "r" (&mat)
- );
- d[destoffset++] = rct->priv->table8[r];
- d[destoffset++] = rct->priv->table8[g];
- d[destoffset++] = rct->priv->table8[b];
+ "movd %%mm5, %%eax\n\t" /* write (int)Y to eax
*/
+
+ /* Compute luminance adjusted pixel */
+ "movq (%3), %%mm0\n\t" /* R, G */
+ "movq 8(%3), %%mm1\n\t" /* B, 0 */
+ "movd (%2, %%"REG_a", 4), %%mm2\n\t" /* mm2 = x
| a */
+ "pshufw $0x44, %%mm2, %%mm2\n\t" /* mm2 = a | a
*/
+ "pfmul %%mm2, %%mm0\n\t" /* mm0 = aR | aG */
+ "pfmul %%mm2, %%mm1\n\t" /* mm1 = aB | 0 */
+ "pfmin %%mm6, %%mm0\n\t" /* mm0 = min(mm0,
65535.f) */
+ "pfmin %%mm6, %%mm1\n\t" /* mm1 = min(mm1,
65535.f) */
+ "pfmax %%mm7, %%mm0\n\t" /* mm0 = max(mm0, 0) */
+ "pfmax %%mm7, %%mm1\n\t" /* mm1 = max(mm1, 0) */
+ "pfadd %%mm3, %%mm0\n\t" /* +0.5 */
+ "pfadd %%mm3, %%mm1\n\t" /* +0.5 */
+ "pf2id %%mm0, %%mm0\n\t" /* mm0 = (int)aR |
(int)aG */
+ "pf2id %%mm1, %%mm1\n\t" /* mm1 = (int)aB | 0 */
+ "movq %%mm0, (%4)\n\t" /* write new R/G to rgbx
*/
+ "movq %%mm1, 8(%4)\n\t" /* write new B/0 to
rgbx */
+#else
+ "movq (%3), %%mm0\n\t"
+ "movq 8(%3), %%mm1\n\t"
+ "pfmin %%mm6, %%mm0\n\t"
+ "pfmin %%mm6, %%mm1\n\t"
+ "pfmax %%mm7, %%mm0\n\t"
+ "pfmax %%mm7, %%mm1\n\t"
+ "pfadd %%mm3, %%mm0\n\t"
+ "pfadd %%mm3, %%mm1\n\t"
+ "pf2id %%mm0, %%mm0\n\t"
+ "pf2id %%mm1, %%mm1\n\t"
+ "movq %%mm0, (%4)\n\t"
+ "movq %%mm1, 8(%4)\n\t"
+#endif
+ : "+r" (s)
+ : "r" (mat),
+ "r" (rct->priv->luminance_float),
+ "r" (tmp),
+ "r" (rgbx),
+ "r" (halfone)
+ : "%"REG_a, "memory");
+ d[destoffset++] = rct->priv->table8[rgbx[0]];
+ d[destoffset++] = rct->priv->table8[rgbx[1]];
+ d[destoffset++] = rct->priv->table8[rgbx[2]];
}
}
asm volatile ("femms\n\t");
@@ -693,18 +822,19 @@ COLOR_TRANSFORM(transform_cms8_sse)
gint col;
gfloat top[4] align(16) = {65535.0, 65535.0, 65535.0, 65535.0};
gfloat mat[12] align(16) = {
+ LUM_XYZ_R,
rct->priv->color_matrix.coeff[0][0],
rct->priv->color_matrix.coeff[1][0],
rct->priv->color_matrix.coeff[2][0],
- 0.0,
+ LUM_XYZ_G,
rct->priv->color_matrix.coeff[0][1],
rct->priv->color_matrix.coeff[1][1],
rct->priv->color_matrix.coeff[2][1],
- 0.0,
+ LUM_XYZ_B,
rct->priv->color_matrix.coeff[0][2],
rct->priv->color_matrix.coeff[1][2],
rct->priv->color_matrix.coeff[2][2],
- 0.0 };
+ };
asm volatile (
"movups (%2), %%xmm2\n\t" /* rs->pre_mul */
"movaps (%0), %%xmm3\n\t" /* matrix */
@@ -748,7 +878,7 @@ COLOR_TRANSFORM(transform_cms8_sse)
"addps %%xmm1, %%xmm7\n\t"
"movaps %%xmm0, %%xmm1\n\t"
- "shufps $0xAA, %%xmm1, %%xmm1\n\t"
+ "shufps $0xaa, %%xmm1, %%xmm1\n\t"
"mulps %%xmm5, %%xmm1\n\t"
"addps %%xmm7, %%xmm1\n\t"
@@ -756,14 +886,22 @@ COLOR_TRANSFORM(transform_cms8_sse)
"minps %%xmm6, %%xmm1\n\t"
"maxps %%xmm7, %%xmm1\n\t"
- "cvtss2si %%xmm1, %0\n\t"
- "shufps $0xF9, %%xmm1, %%xmm1\n\t"
- "cvtss2si %%xmm1, %1\n\t"
- "shufps $0xF9, %%xmm1, %%xmm1\n\t"
- "cvtss2si %%xmm1, %2\n\t"
+ /* xmm1: Y | R | G | B */
+ "cvtss2si %%xmm1, %%"REG_a"\n\t" /* Extract
Luminance integer value */
+ "movss (%4, %%"REG_a", 4), %%xmm0\n\t" /* xmm0
= Y'/Y | _ | _ | _ */
+ "shufps $0, %%xmm0, %%xmm0\n\t" /* xmm0 = Y'/Y
| Y'/Y | Y'/Y | Y'/Y */
+ "mulps %%xmm0, %%xmm1\n\t" /* xmm1 = Y' | R' |
G' | B' */
+ "minps %%xmm6, %%xmm1\n\t" /* MIN (65535.0, in)
*/
+ "maxps %%xmm7, %%xmm1\n\t" /* MAX (0.0, in) */
+ "shufps $0xf9, %%xmm1, %%xmm1\n\t" /* xmm1 = R'
| G' | B' | _ */
+ "cvtss2si %%xmm1, %0\n\t" /* Extract R' */
+ "shufps $0xf9, %%xmm1, %%xmm1\n\t" /* xmm1 = G'
| B' | _ | _ */
+ "cvtss2si %%xmm1, %1\n\t" /* Extract G' */
+ "shufps $0xf9, %%xmm1, %%xmm1\n\t" /* xmm1 = B'
| _ | _ | _ */
+ "cvtss2si %%xmm1, %2\n\t" /* Extract B' */
: "=r" (r), "=r" (g), "=r" (b)
- : "r" (s)
- : "memory"
+ : "r" (s), "r" (rct->priv->luminance_float)
+ : "%"REG_a, "memory"
);
buffer[destoffset++] = rct->priv->table16[r];
buffer[destoffset++] = rct->priv->table16[g];
@@ -782,23 +920,29 @@ COLOR_TRANSFORM(transform_cms8_3dnow)
gushort *buffer = g_malloc(width*3*sizeof(gushort));
gint destoffset;
gint col;
- register glong r=0,g=0,b=0;
- gfloat mat[12] align(8);
- gfloat top[2] align(8);
- mat[0] = rct->priv->color_matrix.coeff[0][0];
- mat[1] = rct->priv->color_matrix.coeff[0][1];
- mat[2] = rct->priv->color_matrix.coeff[0][2];
- mat[3] = 0.0;
- mat[4] = rct->priv->color_matrix.coeff[1][0];
- mat[5] = rct->priv->color_matrix.coeff[1][1];
- mat[6] = rct->priv->color_matrix.coeff[1][2];
- mat[7] = 0.0;
- mat[8] = rct->priv->color_matrix.coeff[2][0];
- mat[9] = rct->priv->color_matrix.coeff[2][1];
- mat[10] = rct->priv->color_matrix.coeff[2][2];
- mat[11] = 0.0;
- top[0] = 65535.0;
- top[1] = 65535.0;
+ gint rgbx[4] align(8) = { 0, 0, 0, 0};
+ gfloat tmp[4] align(8) = { 0.f, 0.f, 0.f, 0.f};
+ gfloat mat[16] align(8) = {
+ rct->priv->color_matrix.coeff[0][0],
+ rct->priv->color_matrix.coeff[0][1],
+ rct->priv->color_matrix.coeff[0][2],
+ 0.0,
+ rct->priv->color_matrix.coeff[1][0],
+ rct->priv->color_matrix.coeff[1][1],
+ rct->priv->color_matrix.coeff[1][2],
+ 0.0,
+ rct->priv->color_matrix.coeff[2][0],
+ rct->priv->color_matrix.coeff[2][1],
+ rct->priv->color_matrix.coeff[2][2],
+ 0.0,
+ LUM_XYZ_R,
+ LUM_XYZ_G,
+ LUM_XYZ_B,
+ 0.0
+ };
+ const gfloat top[2] align(8) = { 65535.f, 65535.f};
+ const gfloat halfone[2] align(8) = { .5f, .5f};
+
asm volatile (
"femms\n\t"
"pxor %%mm7, %%mm7\n\t" /* 0x0 */
@@ -818,7 +962,7 @@ COLOR_TRANSFORM(transform_cms8_3dnow)
asm volatile (
/* pre multiply */
"movq (%0), %%mm0\n\t" /* R | G | B | G2 */
- "movq %%mm0, %%mm1\n\t" /* R | G | B | G2 */
+ "movq (%0), %%mm1\n\t" /* R | G | B | G2 */
"punpcklwd %%mm7, %%mm0\n\t" /* R, G */
"punpckhwd %%mm7, %%mm1\n\t" /* B, G2 */
"pi2fd %%mm0, %%mm0\n\t" /* to float */
@@ -830,23 +974,46 @@ COLOR_TRANSFORM(transform_cms8_3dnow)
"pfmax %%mm7, %%mm0\n\t"
"pfmax %%mm7, %%mm1\n\t"
+ "movq (%5), %%mm3\n\t" /* mm3 = halfone */
"add $8, %0\n\t" /* increment offset */
/* red */
- "movq (%4), %%mm4\n\t" /* mat[0] | mat[1] */
- "movq 8(%4), %%mm5\n\t" /* mat[2] | mat[3] */
+ "movq (%1), %%mm4\n\t" /* mat[0] | mat[1] */
+ "movq 8(%1), %%mm5\n\t" /* mat[2] | mat[3] */
"pfmul %%mm0, %%mm4\n\t" /* R*[0] | G*[1] */
"pfmul %%mm1, %%mm5\n\t" /* B*[2] | G2*[3] */
"pfadd %%mm4, %%mm5\n\t" /* R*[0] + B*[2] |
G*[1] + G2*[3] */
"pfacc %%mm5, %%mm5\n\t" /* R*[0] + B*[2] +
G*[1] + G2*[3] | ? */
"pfmin %%mm6, %%mm5\n\t"
"pfmax %%mm7, %%mm5\n\t"
- "pf2id %%mm5, %%mm5\n\t" /* to integer */
- "movd %%mm5, %1\n\t" /* write r */
+ "movd %%mm5, (%3)\n\t" /* write to tmp[0] */
/* green */
- "movq 16(%4), %%mm4\n\t"
- "movq 24(%4), %%mm5\n\t"
+ "movq 16(%1), %%mm4\n\t"
+ "movq 24(%1), %%mm5\n\t"
+ "pfmul %%mm0, %%mm4\n\t"
+ "pfmul %%mm1, %%mm5\n\t"
+ "pfadd %%mm4, %%mm5\n\t"
+ "pfacc %%mm5, %%mm5\n\t"
+ "pfmin %%mm6, %%mm5\n\t"
+ "pfmax %%mm7, %%mm5\n\t"
+ "movd %%mm5, 4(%3)\n\t" /* write to tmp[1] */
+
+ /* blue */
+ "movq 32(%1), %%mm4\n\t"
+ "movq 40(%1), %%mm5\n\t"
+ "pfmul %%mm0, %%mm4\n\t"
+ "pfmul %%mm1, %%mm5\n\t"
+ "pfadd %%mm4, %%mm5\n\t"
+ "pfacc %%mm5, %%mm5\n\t"
+ "pfmin %%mm6, %%mm5\n\t"
+ "pfmax %%mm7, %%mm5\n\t"
+ "movd %%mm5, 8(%3)\n\t" /* write to tmp[2] */
+
+#ifdef USE_3DNOW_LUMINANCE_CURVE
+ /* Luminance */
+ "movq 48(%1), %%mm4\n\t"
+ "movq 56(%1), %%mm5\n\t"
"pfmul %%mm0, %%mm4\n\t"
"pfmul %%mm1, %%mm5\n\t"
"pfadd %%mm4, %%mm5\n\t"
@@ -854,25 +1021,51 @@ COLOR_TRANSFORM(transform_cms8_3dnow)
"pfmin %%mm6, %%mm5\n\t"
"pfmax %%mm7, %%mm5\n\t"
"pf2id %%mm5, %%mm5\n\t"
- "movd %%mm5, %2\n\t"
-
- /* blue */
- "movq 32(%4), %%mm4\n\t"
- "movq 40(%4), %%mm5\n\t"
- "pfmul %%mm0, %%mm4\n\t"
- "pfmul %%mm1, %%mm5\n\t"
- "pfadd %%mm4, %%mm5\n\t"
- "pfacc %%mm5, %%mm5\n\t"
- "pfmin %%mm6, %%mm5\n\t"
- "pfmax %%mm7, %%mm5\n\t"
- "pf2id %%mm5, %%mm5\n\t"
- "movd %%mm5, %3\n\t"
- : "+r" (s), "+r" (r), "+r" (g), "+r" (b)
- : "r" (&mat)
- );
- buffer[destoffset++] = rct->priv->table16[r];
- buffer[destoffset++] = rct->priv->table16[g];
- buffer[destoffset++] = rct->priv->table16[b];
+ "pfadd %%mm3, %%mm5\n\t" /* +0.5 to have real
round */
+ "movd %%mm5, %%eax\n\t"
+
+ /* Compute luminance corrected values */
+ "movq (%3), %%mm0\n\t" /* R, G */
+ "movq 8(%3), %%mm1\n\t" /* B, 0 */
+ "movd (%2, %%"REG_a", 4), %%mm2\n\t" /* mm2 = x
| a */
+ "pshufw $0x44, %%mm2, %%mm2\n\t" /* mm2 = a | a
*/
+ "pfmul %%mm2, %%mm0\n\t" /* mm0 = aR | aG */
+ "pfmul %%mm2, %%mm1\n\t" /* mm1 = aB | 0 */
+ "pfmin %%mm6, %%mm0\n\t" /* mm0 = min(mm0,
65535.f) */
+ "pfmin %%mm6, %%mm1\n\t" /* mm1 = min(mm1,
65535.f) */
+ "pfmax %%mm7, %%mm0\n\t" /* mm0 = max(mm0, 0) */
+ "pfmax %%mm7, %%mm1\n\t" /* mm1 = max(mm1, 0) */
+ "pfadd %%mm3, %%mm0\n\t" /* +0.5 */
+ "pfadd %%mm3, %%mm1\n\t" /* +0.5 */
+ "pf2id %%mm0, %%mm0\n\t" /* mm0 = (int)aR |
(int)aG */
+ "pf2id %%mm1, %%mm1\n\t" /* mm1 = (int)aB | 0 */
+ "movq %%mm0, (%4)\n\t" /* write new R/G to rgbx
*/
+ "movq %%mm1, 8(%4)\n\t" /* write new B/0 to
rgbx */
+#else
+ "movq (%3), %%mm0\n\t"
+ "movq 8(%3), %%mm1\n\t"
+ "pfmin %%mm6, %%mm0\n\t"
+ "pfmin %%mm6, %%mm1\n\t"
+ "pfmax %%mm7, %%mm0\n\t"
+ "pfmax %%mm7, %%mm1\n\t"
+ "pfadd %%mm3, %%mm0\n\t"
+ "pfadd %%mm3, %%mm1\n\t"
+ "pf2id %%mm0, %%mm0\n\t"
+ "pf2id %%mm1, %%mm1\n\t"
+ "movq %%mm0, (%4)\n\t"
+ "movq %%mm1, 8(%4)\n\t"
+#endif
+ : "+r" (s)
+ : "r" (&mat),
+ "r" (rct->priv->luminance_float),
+ "r" (tmp),
+ "r" (rgbx),
+ "r" (halfone)
+ : "%"REG_a, "memory");
+
+ buffer[destoffset++] = rct->priv->table16[rgbx[0]];
+ buffer[destoffset++] = rct->priv->table16[rgbx[1]];
+ buffer[destoffset++] = rct->priv->table16[rgbx[2]];
}
cmsDoTransform((cmsHPROFILE) rct->priv->transform, buffer,
out+height * out_rowstride, width);
}
@@ -901,6 +1094,8 @@ COLOR_TRANSFORM(transform_cms_c)
srcoffset = y * in_rowstride;
for(x=0 ; x<width ; x++)
{
+ guint Y;
+
rr = (in[srcoffset+R]*pre_muli[R])>>7;
gg = (in[srcoffset+G]*pre_muli[G])>>7;
bb = (in[srcoffset+B]*pre_muli[B])>>7;
@@ -915,6 +1110,23 @@ COLOR_TRANSFORM(transform_cms_c)
+ gg*mati.coeff[2][1]
+ bb*mati.coeff[2][2])>>MATRIX_RESOLUTION;
_CLAMP65535_TRIPLET(r,g,b);
+
+ // Compute luminance
+ Y = ( LUM_FIXED_XYZ_R*r
+ + LUM_FIXED_XYZ_G*g
+ + LUM_FIXED_XYZ_B*b
+ + LUM_FIXED_HALFONE)>>LUM_PRECISION;
+
+ // Find the factor to apply to the RGB triplet
+ Y = rct->priv->luminance_fixed[Y];
+
+ // Multiplythe RGB triplet using fixed point arithmetic
+ r = (r*Y)>>LUM_PRECISION;
+ g = (g*Y)>>LUM_PRECISION;
+ b = (b*Y)>>LUM_PRECISION;
+
+ _CLAMP65535_TRIPLET(r,g,b);
+
buffer[destoffset++] = rct->priv->table16[r];
buffer[destoffset++] = rct->priv->table16[g];
buffer[destoffset++] = rct->priv->table16[b];
@@ -945,6 +1157,8 @@ COLOR_TRANSFORM(transform_nocms_c)
srcoffset = y * in_rowstride;
for(x=0 ; x<width ; x++)
{
+ guint Y;
+
rr = (in[srcoffset+R]*pre_muli[R]+64)>>7;
gg = (in[srcoffset+G]*pre_muli[G]+64)>>7;
bb = (in[srcoffset+B]*pre_muli[B]+64)>>7;
@@ -959,6 +1173,23 @@ COLOR_TRANSFORM(transform_nocms_c)
+ gg*mati.coeff[2][1]
+ bb*mati.coeff[2][2])>>MATRIX_RESOLUTION;
_CLAMP65535_TRIPLET(r,g,b);
+
+ // Compute luminance
+ Y = ( LUM_FIXED_XYZ_R*r
+ + LUM_FIXED_XYZ_G*g
+ + LUM_FIXED_XYZ_B*b
+ + LUM_FIXED_HALFONE)>>LUM_PRECISION;
+
+ // Find the factor to apply to the RGB triplet
+ Y = rct->priv->luminance_fixed[Y];
+
+ // Multiplythe RGB triplet using fixed point arithmetic
+ r = (r*Y)>>LUM_PRECISION;
+ g = (g*Y)>>LUM_PRECISION;
+ b = (b*Y)>>LUM_PRECISION;
+
+ _CLAMP65535_TRIPLET(r,g,b);
+
d[destoffset++] = rct->priv->table8[r];
d[destoffset++] = rct->priv->table8[g];
d[destoffset++] = rct->priv->table8[b];
@@ -1005,6 +1236,8 @@ rs_color_transform_make_histogram(RS_COL
srcoffset = y * input->rowstride;
for(x=0 ; x<input->w ; x++)
{
+ guint Y;
+
rr = (in[srcoffset+R]*pre_muli[R])>>7;
gg = (in[srcoffset+G]*pre_muli[G])>>7;
bb = (in[srcoffset+B]*pre_muli[B])>>7;
@@ -1020,6 +1253,22 @@ rs_color_transform_make_histogram(RS_COL
+ bb*mati.coeff[2][2])>>MATRIX_RESOLUTION;
_CLAMP65535_TRIPLET(r,g,b);
+ // Compute luminance
+ Y = ( LUM_FIXED_XYZ_R*r
+ + LUM_FIXED_XYZ_G*g
+ + LUM_FIXED_XYZ_B*b
+ + LUM_FIXED_HALFONE)>>LUM_PRECISION;
+
+ // Find the factor to apply to the RGB triplet
+ Y = rct->priv->luminance_fixed[Y];
+
+ // Multiplythe RGB triplet using fixed point arithmetic
+ r = (r*Y)>>LUM_PRECISION;
+ g = (g*Y)>>LUM_PRECISION;
+ b = (b*Y)>>LUM_PRECISION;
+
+ _CLAMP65535_TRIPLET(r,g,b);
+
if (rct->priv->transform != NULL)
{
buffer16[x*3+R] = r;
_______________________________________________
Rawstudio-dev mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-dev