Author: post
Date: 2009-09-09 23:49:08 +0200 (Wed, 09 Sep 2009)
New Revision: 148
Modified:
RawSpeed/RawImage.cpp
Log:
Add GCC 64-bit SSE2 scaling for black/white.
Disabled Win32 assembler since it doesn't work properly.
Modified: RawSpeed/RawImage.cpp
===================================================================
--- RawSpeed/RawImage.cpp 2009-09-08 18:30:51 UTC (rev 147)
+++ RawSpeed/RawImage.cpp 2009-09-09 21:49:08 UTC (rev 148)
@@ -1,7 +1,7 @@
#include "StdAfx.h"
#include "RawImage.h"
#include "RawDecoder.h" // For exceptions
-/*
+/*
RawSpeed - RAW file decoder.
Copyright (C) 2009 Klaus Post
@@ -23,18 +23,18 @@
http://www.klauspost.com
*/
-RawImageData::RawImageData(void):
-dim(0,0), bpp(0), isCFA(true),
-blackLevel(-1), whitePoint(65536),
+RawImageData::RawImageData(void):
+dim(0,0), bpp(0), isCFA(true),
+blackLevel(-1), whitePoint(65536),
dataRefCount(0), data(0), cpp(1)
{
pthread_mutex_init(&mymutex, NULL);
subsampling.x = subsampling.y = 1;
}
-RawImageData::RawImageData(iPoint2D _dim, guint _bpc, guint _cpp) :
-dim(_dim), bpp(_bpc),
-blackLevel(-1), whitePoint(65536),
+RawImageData::RawImageData(iPoint2D _dim, guint _bpc, guint _cpp) :
+dim(_dim), bpp(_bpc),
+blackLevel(-1), whitePoint(65536),
dataRefCount(0),data(0), cpp(cpp)
{
subsampling.x = subsampling.y = 1;
@@ -139,25 +139,30 @@
scaleValues(f);
}
-#if _MSC_VER > 1399
+#if _MSC_VER > 13990
void RawImageData::scaleValues(float f) {
int info[4];
__cpuid(info,1);
// Check SSE2
- if (f >= 0.0f && info[3]&(1<<26)) {
-
+ if (f >= 0.0f && info[3]&(1<<26)) {
+
__m128i ssescale;
+ __m128i ssesub;
guint gw = pitch / 16;
guint i = (int)(65536.0f*f); // 16 bit fraction
i |= i<<16;
+ guint b = blackLevel | (blackLevel<<16);
+
ssescale = _mm_set_epi32(i,i,i,i);
+ ssesub = _mm_set_epi32(b,b,b,b);
for (int y = 0; y < dim.y; y++) {
__m128i* pixel = (__m128i*)&data[(mOffset.y+y)*pitch];
for (guint x = 0 ; x < gw; x++) {
__m128i pix = _mm_load_si128(pixel);
+ pix = _mm_subs_epu16(pix, ssesub);
pix = _mm_mulhi_epu16(pix, ssescale);
_mm_store_si128(pixel, pix);
pixel++;
@@ -179,14 +184,79 @@
#else
void RawImageData::scaleValues(float f) {
+#if defined (__x86_64__)
+ //TODO: Check for SSE2 on 32 bit systems and use it there
+ guint temp[20];
+
+ guint i = (int)(1024.0f*f); // 10 bit fraction
+ i |= i<<16;
+ guint b = blackLevel | (blackLevel<<16);
+
+ for (int j = 0; j < 4; j++) {
+ temp[j] = b;
+ temp[j+4] = i;
+ temp[j+8] = 512;
+ temp[j+12] = 32768;
+ temp[j+16] = 0x80008000;
+ }
+
+ asm volatile
+ (
+ "movdqu 0(%0), %%xmm7\n" // Subtraction
+ "movdqu 16(%0), %%xmm6\n" // Multiplication factor
+ "movdqu 32(%0), %%xmm5\n" // Fraction
+ "movdqu 48(%0), %%xmm4\n" // Sub 32768
+ "movdqu 64(%0), %%xmm3\n" // Sign shift
+ : // no output registers
+ : "r" (temp)
+ : // %0
+ );
+
+ for (int y = 0; y < dim.y; y++) {
+ guchar* pixel = (guchar*)&data[(mOffset.y+y)*pitch];
+ guint gw = pitch >> 4;
+ for (guint x = 0; x < gw ; x++) {
+ asm volatile (
+ "next_pixel:\n"
+ "movaps 0(%0), %%xmm0\n"
+ "psubusw %%xmm7, %%xmm0\n" // Subtract black
+ "movaps %%xmm0, %%xmm1\n"
+ "pmullw %%xmm6, %%xmm0\n"
+ "pmulhuw %%xmm6, %%xmm1\n"
+ "movaps %%xmm0, %%xmm2\n"
+ "punpcklwd %%xmm1, %%xmm0\n" // First 4 result
+ "punpckhwd %%xmm1, %%xmm2\n" // Last 4 result
+ "paddd %%xmm5, %%xmm0\n" // Add fraction
+ "paddd %%xmm5, %%xmm2\n"
+ "psrad $10, %%xmm0\n"
+ "psrad $10, %%xmm2\n"
+ "psubd %%xmm4, %%xmm0\n" // Avoid saturation
+ "psubd %%xmm4, %%xmm2\n"
+ "packssdw %%xmm2, %%xmm0\n"
+ "pxor %%xmm3, %%xmm0\n" // Shift sign
+ "movaps %%xmm0, 0(%0)\n"
+
+ "add $16, %0\n"
+ : // no output registers
+ : "r" (pixel)
+ : // %0
+ );
+ }
+ }
+
+#else
+
gint gw = dim.x*cpp;
- int scale = (int)(16384.0f*f); // 14 bit fraction
+ int scale = (int)(16384.0f*f); // 14 bit fraction
for (int y = 0; y < dim.y; y++) {
gushort *pixel = (gushort*)getData(0,y);
for (int x = 0 ; x < gw; x++) {
- pixel[x] = clampbits(((pixel[x]-blackLevel)*scale+8192)>>14,16);
+ pixel[x] = clampbits(((pixel[x]-blackLevel)*scale+8192)>>14,16);
}
}
+
+#endif
+
}
#endif
_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit