Author: post
Date: 2010-02-04 21:54:52 +0100 (Thu, 04 Feb 2010)
New Revision: 3161
Modified:
trunk/plugins/dcp/dcp-sse2.c
trunk/plugins/dcp/dcp.c
Log:
Reduce curve table to 256 entries, but interpolate between them instead.
Significant speedup and avoid posterizing the image.
Modified: trunk/plugins/dcp/dcp-sse2.c
===================================================================
--- trunk/plugins/dcp/dcp-sse2.c 2010-02-04 19:31:47 UTC (rev 3160)
+++ trunk/plugins/dcp/dcp-sse2.c 2010-02-04 20:54:52 UTC (rev 3161)
@@ -550,6 +550,7 @@
static gfloat _rgb_div_ps[4] __attribute__ ((aligned (16))) = {1.0/65535.0,
1.0/65535.0, 1.0/65535.0, 1.0/65535.0};
static gint _15_bit_epi32[4] __attribute__ ((aligned (16))) = { 32768, 32768,
32768, 32768};
static guint _16_bit_sign[4] __attribute__ ((aligned (16))) =
{0x80008000,0x80008000,0x80008000,0x80008000};
+static gfloat _twofiftysix_ps[4] __attribute__ ((aligned (16))) =
{255.9999f,255.9999f,255.9999f,255.9999f};
#define SETFLOAT4(N, A, B, C, D) float N[4] __attribute__ ((aligned (16))); \
N[0] = D; N[1] = C; N[2] = B; N[3] = A;
@@ -761,20 +762,25 @@
if (!dcp->curve_is_flat)
{
- /* Convert v to lookup values */
- /* TODO: Use 8 bit fraction as interpolation,
for interpolating
- * a more precise lookup using linear
interpolation. Maybe use less than
- * 16 bits for lookup for speed, 10 bits with
interpolation should be enough */
- __m128 v_mul = _mm_load_ps(_16_bit_ps);
- v = _mm_mul_ps(v, v_mul);
- __m128i lookup = _mm_cvtps_epi32(v);
- gfloat* v_p = (gfloat*)&v;
+ /* Convert v to lookup values and interpolate */
+ __m128 v_mul = _mm_mul_ps(v,
_mm_load_ps(_twofiftysix_ps));
+ __m128i lookup = _mm_cvtps_epi32(v_mul);
_mm_store_si128((__m128i*)&xfer[0], lookup);
- v_p[0] = dcp->curve_samples[xfer[0]];
- v_p[1] = dcp->curve_samples[xfer[1]];
- v_p[2] = dcp->curve_samples[xfer[2]];
- v_p[3] = dcp->curve_samples[xfer[3]];
+ /* Calculate fractions */
+ __m128 frac = _mm_sub_ps(v_mul,
_mm_floor_positive_ps(v_mul));
+ __m128 inv_frac =
_mm_sub_ps(_mm_load_ps(_ones_ps), frac);
+
+ /* Load two adjacent curve values and
interpolate between them */
+ __m128 p0p1 =
_mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&dcp->curve_samples[xfer[0]]));
+ __m128 p2p3 =
_mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&dcp->curve_samples[xfer[2]]));
+ p0p1 = _mm_loadh_pi(p0p1,
(__m64*)&dcp->curve_samples[xfer[1]]);
+ p2p3 = _mm_loadh_pi(p2p3,
(__m64*)&dcp->curve_samples[xfer[3]]);
+
+ /* Pack all lower values in v0, high in v1 and
interpolate */
+ __m128 v0 = _mm_shuffle_ps(p0p1, p2p3,
_MM_SHUFFLE(2,0,2,0));
+ __m128 v1 = _mm_shuffle_ps(p0p1, p2p3,
_MM_SHUFFLE(3,1,3,1));
+ v = _mm_add_ps(_mm_mul_ps(inv_frac, v0),
_mm_mul_ps(frac, v1));
}
/* Apply looktable */
Modified: trunk/plugins/dcp/dcp.c
===================================================================
--- trunk/plugins/dcp/dcp.c 2010-02-04 19:31:47 UTC (rev 3160)
+++ trunk/plugins/dcp/dcp.c 2010-02-04 20:54:52 UTC (rev 3161)
@@ -197,13 +197,13 @@
g_object_unref(spline);
/* Create extra entry */
sampled[65536] = sampled[65535];
- for (i = 0; i < 65536; i++)
+ for (i = 0; i < 256; i++)
{
- gfloat value = (gfloat)i * (1.0
/ 65535.0f);
+ gfloat value = (gfloat)i * (1.0
/ 255.0f);
/* Gamma correct value */
value = powf(value, 1.0f /
2.2f);
- /*Lookup curve corrected value
*/
+ /* Lookup curve corrected value
*/
gfloat lookup = (int)(value *
65535.0f);
gfloat v0 =
sampled[(int)lookup];
gfloat v1 =
sampled[(int)lookup+1];
@@ -216,6 +216,7 @@
/* Store in table */
dcp->curve_samples[i] = value;
}
+ dcp->curve_samples[256] =
dcp->curve_samples[255];
}
}
if (knots)
@@ -224,7 +225,7 @@
else
dcp->curve_is_flat = TRUE;
- for(i=0;i<65536;i++)
+ for(i=0;i<257;i++)
dcp->curve_samples[i] = MIN(1.0f, MAX(0.0f,
dcp->curve_samples[i]));
changed = TRUE;
@@ -267,7 +268,7 @@
{
RSDcpClass *klass = RS_DCP_GET_CLASS(dcp);
- dcp->curve_samples = g_new(gfloat, 65536);
+ dcp->curve_samples = g_new(gfloat, 257);
dcp->huesatmap_interpolated = NULL;
dcp->use_profile = FALSE;
dcp->curve_is_flat = TRUE;
@@ -921,7 +922,13 @@
/* Curve */
if (!dcp->curve_is_flat)
- v = dcp->curve_samples[_S(v)];
+ {
+ gfloat lookup = CLAMP(v * 256.0f, 0.0f,
255.9999f);
+ gfloat v0 = dcp->curve_samples[(gint)lookup];
+ gfloat v1 = dcp->curve_samples[(gint)lookup +
1];
+ lookup -= floorf(lookup);
+ v = v0 * (1.0f - lookup) + v1 * lookup;
+ }
if (dcp->looktable)
huesat_map(dcp->looktable, &h, &s, &v);
_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit