Author: post
Date: 2009-12-30 17:03:31 +0100 (Wed, 30 Dec 2009)
New Revision: 2892
Modified:
branches/rawstudio-ng-color/plugins/dcp/dcp-sse.c
Log:
DCP: Use built-in functions to generate zeroes instead of loading them.
Modified: branches/rawstudio-ng-color/plugins/dcp/dcp-sse.c
===================================================================
--- branches/rawstudio-ng-color/plugins/dcp/dcp-sse.c 2009-12-30 15:22:01 UTC
(rev 2891)
+++ branches/rawstudio-ng-color/plugins/dcp/dcp-sse.c 2009-12-30 16:03:31 UTC
(rev 2892)
@@ -32,7 +32,6 @@
/* _mm_insert_epi32, since no-one was kind enough to include "insertps xmm,
mem32, imm8" */
/* as a valid intrinsic. So we use the integer equivalent instead */
-static gfloat _zero_ps[4] __attribute__ ((aligned (16))) = {0.0f, 0.0f, 0.0f,
0.0f};
static gfloat _ones_ps[4] __attribute__ ((aligned (16))) = {1.0f, 1.0f, 1.0f,
1.0f};
static gfloat _two_ps[4] __attribute__ ((aligned (16))) = {2.0f, 2.0f, 2.0f,
2.0f};
static gfloat _six_ps[4] __attribute__ ((aligned (16))) = {6.0f-1e-15,
6.0f-1e-15, 6.0f-1e-15, 6.0f-1e-15};
@@ -42,7 +41,7 @@
RGBtoHSV_SSE(__m128 *c0, __m128 *c1, __m128 *c2)
{
- __m128 zero_ps = _mm_load_ps(_zero_ps);
+ __m128 zero_ps = _mm_setzero_ps();
__m128 small_ps = _mm_load_ps(_very_small_ps);
__m128 ones_ps = _mm_load_ps(_ones_ps);
@@ -66,7 +65,7 @@
__m128 v_mask = _mm_cmpeq_ps(gap, zero_ps);
v = _mm_add_ps(v, _mm_and_ps(add_v, v_mask));
- h = _mm_xor_ps(r,r);
+ h = _mm_setzero_ps();
/* Set gap to one where sat = 0, this will avoid divisions by zero,
these values will not be used */
ones_ps = _mm_and_ps(ones_ps, v_mask);
@@ -110,6 +109,7 @@
s = _mm_andnot_ps(v_mask, val );
/* Check if h < 0 */
+ zero_ps = _mm_setzero_ps();
__m128 six_ps = _mm_load_ps(_six_ps);
mask = _mm_cmplt_ps(h, zero_ps);
h = _mm_add_ps(h, _mm_and_ps(mask, six_ps));
@@ -355,7 +355,6 @@
next_offsets = _mm_add_epi32(next_offsets, table_offsets);
table_offsets = _mm_add_epi32(table_offsets,
_mm_mullo_epi16(hIndex0, hueStep));
- // TODO: This will result in a store->load forward size
mismatch penalty, if possible, avoid.
_mm_store_si128((__m128i*)xfer_0, table_offsets);
_mm_store_si128((__m128i*)xfer_1, next_offsets);
gint _valStep = precalc->valStep[0];
@@ -559,7 +558,7 @@
__m128i p1,p2;
__m128 p1f, p2f, p3f, p4f;
__m128 r, g, b, r2, g2, b2;
- __m128i zero = _mm_load_si128((__m128i*)_15_bit_epi32);
+ __m128i zero;
__m128 hue_add = _mm_set_ps(dcp->hue, dcp->hue, dcp->hue, dcp->hue);
__m128 sat = _mm_set_ps(dcp->saturation, dcp->saturation,
dcp->saturation, dcp->saturation);
@@ -597,7 +596,7 @@
{
__m128i* pixel = (__m128i*)GET_PIXEL(image, x, y);
- zero = _mm_xor_si128(zero,zero);
+ zero = _mm_setzero_si128();
/* Convert to float */
p1 = _mm_load_si128(pixel);
@@ -670,7 +669,7 @@
/* Hue */
__m128 six_ps = _mm_load_ps(_six_ps);
- __m128 zero_ps = _mm_load_ps(_zero_ps);
+ __m128 zero_ps = _mm_setzero_ps();
h = _mm_add_ps(h, hue_add);
/* Check if hue > 6 or < 0*/
@@ -757,7 +756,8 @@
huesat_map_SSE2(dcp->looktable,
&dcp->looktable_precalc, &h, &s, &v);
}
- /* Ensure that hue is within range */
+ /* Ensure that hue is within range */
+ zero_ps = _mm_setzero_ps();
h_mask_gt = _mm_cmpgt_ps(h, six_ps);
h_mask_lt = _mm_cmplt_ps(h, zero_ps);
six_masked_gt = _mm_and_ps(six_ps, h_mask_gt);
@@ -827,7 +827,7 @@
RGBtoHSV_SSE4(__m128 *c0, __m128 *c1, __m128 *c2)
{
- __m128 zero_ps = _mm_load_ps(_zero_ps);
+ __m128 zero_ps = _mm_setzero_ps();
__m128 small_ps = _mm_load_ps(_very_small_ps);
__m128 ones_ps = _mm_load_ps(_ones_ps);
@@ -895,6 +895,7 @@
s = _mm_andnot_ps(v_mask, val );
/* Check if h < 0 */
+ zero_ps = _mm_setzero_ps();
__m128 six_ps = _mm_load_ps(_six_ps);
mask = _mm_cmplt_ps(h, zero_ps);
h = _mm_add_ps(h, _mm_and_ps(mask, six_ps));
@@ -1247,7 +1248,7 @@
__m128i p1,p2;
__m128 p1f, p2f, p3f, p4f;
__m128 r, g, b, r2, g2, b2;
- __m128i zero = _mm_load_si128((__m128i*)_15_bit_epi32);
+ __m128i zero;
__m128 hue_add = _mm_set_ps(dcp->hue, dcp->hue, dcp->hue, dcp->hue);
__m128 sat = _mm_set_ps(dcp->saturation, dcp->saturation,
dcp->saturation, dcp->saturation);
@@ -1285,7 +1286,7 @@
{
__m128i* pixel = (__m128i*)GET_PIXEL(image, x, y);
- zero = _mm_xor_si128(zero,zero);
+ zero = _mm_setzero_si128();
/* Convert to float */
p1 = _mm_load_si128(pixel);
@@ -1358,7 +1359,7 @@
/* Hue */
__m128 six_ps = _mm_load_ps(_six_ps);
- __m128 zero_ps = _mm_load_ps(_zero_ps);
+ __m128 zero_ps = _mm_setzero_ps();
h = _mm_add_ps(h, hue_add);
/* Check if hue > 6 or < 0*/
@@ -1445,6 +1446,7 @@
}
/* Ensure that hue is within range */
+ zero_ps = _mm_setzero_ps();
h_mask_gt = _mm_cmpgt_ps(h, six_ps);
h_mask_lt = _mm_cmplt_ps(h, zero_ps);
six_masked_gt = _mm_and_ps(six_ps, h_mask_gt);
_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit