Author: post
Date: 2012-01-31 18:36:27 +0100 (Tue, 31 Jan 2012)
New Revision: 4118

Modified:
   trunk/plugins/lensfun/lensfun-avx.c
   trunk/plugins/lensfun/lensfun-sse4.c
Log:
Use pextrd on Lensfun SSE4/AVX for transferring offsets and weights instead of 
memory.

Modified: trunk/plugins/lensfun/lensfun-avx.c
===================================================================
--- trunk/plugins/lensfun/lensfun-avx.c 2012-01-31 17:16:46 UTC (rev 4117)
+++ trunk/plugins/lensfun/lensfun-avx.c 2012-01-31 17:36:27 UTC (rev 4118)
@@ -255,8 +255,6 @@
        nx = _mm_min_epi32(nx, _m_w);
        ny = _mm_min_epi32(ny, _m_h);
 
-       int xfer[16] __attribute__ ((aligned (16)));
-
        /* Pitch as pixels */
        __m128i pitch = _mm_set1_epi32(in->rowstride >> 2);
 
@@ -281,29 +279,25 @@
        c_offset = _mm_add_epi32(zero12, _mm_slli_epi32(c_offset, 2));
        d_offset = _mm_add_epi32(zero12, _mm_slli_epi32(d_offset, 2));
 
-       _mm_store_si128((__m128i*)xfer, a_offset);
-       _mm_store_si128((__m128i*)&xfer[4], b_offset);
-       _mm_store_si128((__m128i*)&xfer[8], c_offset);
-       _mm_store_si128((__m128i*)&xfer[12], d_offset);
-       
+#define GETW(a,b) _mm_extract_epi32(a,b)
        gushort* pixels[12];
        
        /* Loop unrolled, allows agressive instruction reordering */
        /* Red, then G & B */
-       pixels[0] = in->pixels + xfer[0];       // a
-       pixels[1] = in->pixels + xfer[4];       // b
-       pixels[2] = in->pixels + xfer[8];       // c
-       pixels[3] = in->pixels + xfer[12];      // d
+       pixels[0] = in->pixels + GETW(a_offset,0);
+       pixels[1] = in->pixels + GETW(b_offset,0);
+       pixels[2] = in->pixels + GETW(c_offset,0);
+       pixels[3] = in->pixels + GETW(d_offset,0);
                
-       pixels[4] = in->pixels + xfer[1+0];             // a
-       pixels[5] = in->pixels + xfer[1+4];             // b
-       pixels[6] = in->pixels + xfer[1+8];             // c
-       pixels[7] = in->pixels + xfer[1+12];    // d
+       pixels[4] = in->pixels + GETW(a_offset,1);
+       pixels[5] = in->pixels + GETW(b_offset,1);
+       pixels[6] = in->pixels + GETW(c_offset,1);
+       pixels[7] = in->pixels + GETW(d_offset,1);
 
-       pixels[8] = in->pixels + xfer[2+0];             // a
-       pixels[9] = in->pixels + xfer[2+4];             // b
-       pixels[10] = in->pixels + xfer[2+8];    // c
-       pixels[11] = in->pixels + xfer[2+12];   // d
+       pixels[8] = in->pixels + GETW(a_offset,2);
+       pixels[9] = in->pixels + GETW(b_offset,2);
+       pixels[10] = in->pixels + GETW(c_offset,2);
+       pixels[11] = in->pixels + GETW(d_offset,2);
 
        /* Calculate distances */
        __m128i twofiftyfive = _mm_set1_epi32(255);
@@ -318,18 +312,14 @@
        __m128i cw = _mm_srai_epi32(_mm_mullo_epi16(inv_diffx, diffy),1);
        __m128i dw = _mm_srai_epi32(_mm_mullo_epi16(diffx, diffy),1);
 
-       _mm_store_si128((__m128i*)xfer, aw);
-       _mm_store_si128((__m128i*)&xfer[4], bw);
-       _mm_store_si128((__m128i*)&xfer[8], cw);
-       _mm_store_si128((__m128i*)&xfer[12], dw);
-       
        gushort** p = pixels;
        /* Loop unrolled */
-       out[0]  = (gushort) ((xfer[0] * *p[0] + xfer[4] * *p[1] + xfer[8] * 
*p[2] + xfer[12] * *p[3]  + 16384) >> 15 );
+       out[0]  = (gushort) ((GETW(aw,0) * *p[0] + GETW(bw,0) * *p[1] + 
GETW(cw,0) * *p[2] + GETW(dw,0) * *p[3]  + 16384) >> 15 );
        p+=4;
-       out[1]  = (gushort) ((xfer[1] * *p[0] + xfer[1+4] * *p[1] + xfer[1+8] * 
*p[2] + xfer[1+12] * *p[3]  + 16384) >> 15 );
+       out[1]  = (gushort) ((GETW(aw,1) * *p[0] + GETW(bw,1) * *p[1] + 
GETW(cw,1) * *p[2] + GETW(dw,1) * *p[3]  + 16384) >> 15 );
        p+=4;
-       out[2]  = (gushort) ((xfer[2] * *p[0] + xfer[2+4] * *p[1] + xfer[2+8] * 
*p[2] + xfer[2+12] * *p[3]  + 16384) >> 15 );
+       out[2]  = (gushort) ((GETW(aw,2) * *p[0] + GETW(bw,2) * *p[1] + 
GETW(cw,2) * *p[2] + GETW(dw,2) * *p[3]  + 16384) >> 15 );
+#undef GETW
 }
 
 #else // NO AVX

Modified: trunk/plugins/lensfun/lensfun-sse4.c
===================================================================
--- trunk/plugins/lensfun/lensfun-sse4.c        2012-01-31 17:16:46 UTC (rev 
4117)
+++ trunk/plugins/lensfun/lensfun-sse4.c        2012-01-31 17:36:27 UTC (rev 
4118)
@@ -84,8 +84,6 @@
        nx = _mm_min_epi32(nx, _m_w);
        ny = _mm_min_epi32(ny, _m_h);
 
-       int xfer[16] __attribute__ ((aligned (16)));
-
        /* Pitch as pixels */
        __m128i pitch = _mm_set1_epi32(in->rowstride >> 2);
 
@@ -110,29 +108,25 @@
        c_offset = _mm_add_epi32(zero12, _mm_slli_epi32(c_offset, 2));
        d_offset = _mm_add_epi32(zero12, _mm_slli_epi32(d_offset, 2));
 
-       _mm_store_si128((__m128i*)xfer, a_offset);
-       _mm_store_si128((__m128i*)&xfer[4], b_offset);
-       _mm_store_si128((__m128i*)&xfer[8], c_offset);
-       _mm_store_si128((__m128i*)&xfer[12], d_offset);
-       
+#define GETW(a,b) _mm_extract_epi32(a,b)
        gushort* pixels[12];
        
        /* Loop unrolled, allows agressive instruction reordering */
        /* Red, then G & B */
-       pixels[0] = in->pixels + xfer[0];       // a
-       pixels[1] = in->pixels + xfer[4];       // b
-       pixels[2] = in->pixels + xfer[8];       // c
-       pixels[3] = in->pixels + xfer[12];      // d
+       pixels[0] = in->pixels + GETW(a_offset,0);
+       pixels[1] = in->pixels + GETW(b_offset,0);
+       pixels[2] = in->pixels + GETW(c_offset,0);
+       pixels[3] = in->pixels + GETW(d_offset,0);
                
-       pixels[4] = in->pixels + xfer[1+0];             // a
-       pixels[5] = in->pixels + xfer[1+4];             // b
-       pixels[6] = in->pixels + xfer[1+8];             // c
-       pixels[7] = in->pixels + xfer[1+12];    // d
+       pixels[4] = in->pixels + GETW(a_offset,1);
+       pixels[5] = in->pixels + GETW(b_offset,1);
+       pixels[6] = in->pixels + GETW(c_offset,1);
+       pixels[7] = in->pixels + GETW(d_offset,1);
 
-       pixels[8] = in->pixels + xfer[2+0];             // a
-       pixels[9] = in->pixels + xfer[2+4];             // b
-       pixels[10] = in->pixels + xfer[2+8];    // c
-       pixels[11] = in->pixels + xfer[2+12];   // d
+       pixels[8] = in->pixels + GETW(a_offset,2);
+       pixels[9] = in->pixels + GETW(b_offset,2);
+       pixels[10] = in->pixels + GETW(c_offset,2);
+       pixels[11] = in->pixels + GETW(d_offset,2);
 
        /* Calculate distances */
        __m128i twofiftyfive = _mm_set1_epi32(255);
@@ -147,18 +141,14 @@
        __m128i cw = _mm_srai_epi32(_mm_mullo_epi16(inv_diffx, diffy),1);
        __m128i dw = _mm_srai_epi32(_mm_mullo_epi16(diffx, diffy),1);
 
-       _mm_store_si128((__m128i*)xfer, aw);
-       _mm_store_si128((__m128i*)&xfer[4], bw);
-       _mm_store_si128((__m128i*)&xfer[8], cw);
-       _mm_store_si128((__m128i*)&xfer[12], dw);
-       
        gushort** p = pixels;
        /* Loop unrolled */
-       out[0]  = (gushort) ((xfer[0] * *p[0] + xfer[4] * *p[1] + xfer[8] * 
*p[2] + xfer[12] * *p[3]  + 16384) >> 15 );
+       out[0]  = (gushort) ((GETW(aw,0) * *p[0] + GETW(bw,0) * *p[1] + 
GETW(cw,0) * *p[2] + GETW(dw,0) * *p[3]  + 16384) >> 15 );
        p+=4;
-       out[1]  = (gushort) ((xfer[1] * *p[0] + xfer[1+4] * *p[1] + xfer[1+8] * 
*p[2] + xfer[1+12] * *p[3]  + 16384) >> 15 );
+       out[1]  = (gushort) ((GETW(aw,1) * *p[0] + GETW(bw,1) * *p[1] + 
GETW(cw,1) * *p[2] + GETW(dw,1) * *p[3]  + 16384) >> 15 );
        p+=4;
-       out[2]  = (gushort) ((xfer[2] * *p[0] + xfer[2+4] * *p[1] + xfer[2+8] * 
*p[2] + xfer[2+12] * *p[3]  + 16384) >> 15 );
+       out[2]  = (gushort) ((GETW(aw,2) * *p[0] + GETW(bw,2) * *p[1] + 
GETW(cw,2) * *p[2] + GETW(dw,2) * *p[3]  + 16384) >> 15 );
+#undef GETW
 }
 
 #else // NO SSE4


_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit

Reply via email to