Author: post
Date: 2012-02-13 17:31:58 +0100 (Mon, 13 Feb 2012)
New Revision: 4132
Modified:
trunk/plugins/dcp/dcp-sse4.c
Log:
Use PEXTRD for getting out curve lookup values, and be sure to set correct
rounding mode.
Modified: trunk/plugins/dcp/dcp-sse4.c
===================================================================
--- trunk/plugins/dcp/dcp-sse4.c 2012-02-13 16:00:43 UTC (rev 4131)
+++ trunk/plugins/dcp/dcp-sse4.c 2012-02-13 16:31:58 UTC (rev 4132)
@@ -348,6 +348,8 @@
__m128 r, g, b, r2, g2, b2;
__m128i zero;
+ int _mm_rounding = _MM_GET_ROUNDING_MODE();
+ _MM_SET_ROUNDING_MODE(_MM_ROUND_DOWN);
gboolean do_contrast = (dcp->contrast > 1.001f);
gboolean do_highrec = (dcp->contrast < 0.999f);
__m128 hue_add = _mm_set_ps(dcp->hue, dcp->hue, dcp->hue, dcp->hue);
@@ -362,7 +364,6 @@
SETFLOAT4_SAME(_recover_radius, 1.0 - __recover_radius);
SETFLOAT4_SAME(_contr_base, 0.5f);
SETFLOAT4_SAME(_inv_contrast, 1.0f - dcp->contrast);
- int xfer[4] __attribute__ ((aligned (16)));
SETFLOAT4(_min_cam, 0.0f, dcp->camera_white.z, dcp->camera_white.y,
dcp->camera_white.x);
SETFLOAT4_SAME(_black_minus_radius, dcp->exposure_black -
dcp->exposure_radius);
@@ -556,18 +557,17 @@
{
/* Convert v to lookup values and interpolate */
__m128 v_mul = _mm_mul_ps(v,
_mm_load_ps(_twofiftysix_ps));
- __m128i lookup = _mm_cvtps_epi32(v_mul);
- _mm_store_si128((__m128i*)&xfer[0], lookup);
+ __m128i lookup =
_mm_slli_epi32(_mm_cvtps_epi32(v_mul),1);
/* Calculate fractions */
__m128 frac = _mm_sub_ps(v_mul,
_mm_floor_ps(v_mul));
__m128 inv_frac =
_mm_sub_ps(_mm_load_ps(_ones_ps), frac);
/* Load two adjacent curve values and
interpolate between them */
- __m128 p0p1 =
_mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&dcp->curve_samples[xfer[0]*2]));
- __m128 p2p3 =
_mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&dcp->curve_samples[xfer[2]*2]));
- p0p1 = _mm_loadh_pi(p0p1,
(__m64*)&dcp->curve_samples[xfer[1]*2]);
- p2p3 = _mm_loadh_pi(p2p3,
(__m64*)&dcp->curve_samples[xfer[3]*2]);
+ __m128 p0p1 =
_mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&dcp->curve_samples[_mm_extract_epi32(lookup,0)]));
+ __m128 p2p3 =
_mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&dcp->curve_samples[_mm_extract_epi32(lookup,2)]));
+ p0p1 = _mm_loadh_pi(p0p1,
(__m64*)&dcp->curve_samples[_mm_extract_epi32(lookup,1)]);
+ p2p3 = _mm_loadh_pi(p2p3,
(__m64*)&dcp->curve_samples[_mm_extract_epi32(lookup,3)]);
/* Pack all lower values in v0, high in v1 and
interpolate */
__m128 v0 = _mm_shuffle_ps(p0p1, p2p3,
_MM_SHUFFLE(2,0,2,0));
@@ -627,6 +627,7 @@
_mm_store_si128(pixel + 1, p2);
}
}
+ _MM_SET_ROUNDING_MODE(_mm_rounding);
return TRUE;
}
#undef DW
_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit