Author: post
Date: 2010-12-29 16:14:57 +0100 (Wed, 29 Dec 2010)
New Revision: 3710
Modified:
trunk/plugins/lensfun/lensfun-sse2.c
trunk/plugins/lensfun/lensfun.c
Log:
Disable proposed crop, if only partial image is requested from Lensfun, and do
less processing in these cases.
Modified: trunk/plugins/lensfun/lensfun-sse2.c
===================================================================
--- trunk/plugins/lensfun/lensfun-sse2.c 2010-12-29 03:26:08 UTC (rev
3709)
+++ trunk/plugins/lensfun/lensfun-sse2.c 2010-12-29 15:14:57 UTC (rev
3710)
@@ -204,7 +204,154 @@
p+=4;
out[2] = (gushort) ((xfer[2] * *p[0] + xfer[2+4] * *p[1] + xfer[2+8] *
*p[2] + xfer[2+12] * *p[3] + 16384) >> 15 );
}
-#else
+
+void
+rs_image16_bilinear_nomeasure_sse2(RS_IMAGE16 *in, gushort *out, gfloat *pos)
+{
+ const gint m_w = (in->w-1);
+ const gint m_h = (in->h-1);
+
+ __m128 p0, p1;
+ if ((uintptr_t)pos & 15)
+ {
+ p0 = _mm_loadu_ps(pos); // y1x1 y0x0
+ p1 = _mm_loadu_ps(pos+4); // ---- y2x2
+ } else
+ {
+ p0 = _mm_load_ps(pos); // y1x1 y0x0
+ p1 = _mm_load_ps(pos+4); // ---- y2x2
+ }
+
+ // to x2x2 x1x0
+ __m128 xf = _mm_shuffle_ps(p0, p1, _MM_SHUFFLE(0,0,2,0));
+ // to y2y2 y1y0
+ __m128 yf = _mm_shuffle_ps(p0, p1, _MM_SHUFFLE(1,1,3,1));
+
+ __m128 fl256 = _mm_load_ps(twofiftytwo_ps);
+ xf = _mm_mul_ps(xf, fl256);
+ yf = _mm_mul_ps(yf, fl256);
+ __m128i x = _mm_cvttps_epi32(xf);
+ __m128i y = _mm_cvttps_epi32(yf);
+
+ __m128i _m_w = _mm_slli_epi32(_mm_set1_epi32(m_w), 8);
+ __m128i _m_h = _mm_slli_epi32(_mm_set1_epi32(m_h), 8);
+
+ __m128i x_gt, y_gt;
+
+ /* Clamping */
+ x_gt = _mm_cmpgt_epi32(x, _m_w);
+ y_gt = _mm_cmpgt_epi32(y, _m_h);
+
+ x = _mm_or_si128(_mm_andnot_si128(x_gt, x), _mm_and_si128(_m_w, x_gt));
+ y = _mm_or_si128(_mm_andnot_si128(y_gt, y), _mm_and_si128(_m_h, y_gt));
+
+ __m128i zero = _mm_setzero_si128();
+ __m128i x_lt = _mm_cmplt_epi32(x, zero);
+ __m128i y_lt = _mm_cmplt_epi32(y, zero);
+ x = _mm_andnot_si128(x_lt, x);
+ y = _mm_andnot_si128(y_lt, y);
+
+ __m128i one = _mm_set1_epi32(1);
+ __m128i nx = _mm_add_epi32(one, _mm_srai_epi32(x, 8));
+ __m128i ny = _mm_add_epi32(one, _mm_srai_epi32(y, 8));
+
+ /* Check that 'next' pixels are in bounds */
+ _m_w = _mm_srai_epi32(_m_w, 8);
+ _m_h = _mm_srai_epi32(_m_h, 8);
+
+ x_gt = _mm_cmpgt_epi32(nx, _m_w);
+ y_gt = _mm_cmpgt_epi32(ny, _m_h);
+
+ nx = _mm_or_si128(_mm_andnot_si128(x_gt, nx), _mm_and_si128(_m_w,
x_gt));
+ ny = _mm_or_si128(_mm_andnot_si128(y_gt, ny), _mm_and_si128(_m_h,
y_gt));
+
+ int xfer[16] __attribute__ ((aligned (16)));
+
+ /* Pitch as pixels */
+ __m128i pitch = _mm_set1_epi32(in->rowstride >> 2 | ((in->rowstride >>
2)<<16));
+
+ /* Remove remainder */
+ __m128i tx = _mm_srai_epi32(x, 8);
+ __m128i ty = _mm_srai_epi32(y, 8);
+
+ /* Multiply y by pitch */
+ ty = _mm_packs_epi32(ty, ty);
+ __m128i ty_lo = _mm_mullo_epi16(ty, pitch);
+ __m128i ty_hi = _mm_mulhi_epi16(ty, pitch);
+ ty = _mm_unpacklo_epi16(ty_lo, ty_hi);
+
+ /* Same to next pixel */
+ ny = _mm_packs_epi32(ny, ny);
+ __m128i ny_lo = _mm_mullo_epi16(ny, pitch);
+ __m128i ny_hi = _mm_mulhi_epi16(ny, pitch);
+ ny = _mm_unpacklo_epi16(ny_lo, ny_hi);
+
+ /* Add pitch and x offset */
+ __m128i a_offset = _mm_add_epi32(tx, ty);
+ __m128i b_offset = _mm_add_epi32(nx, ty);
+ __m128i c_offset = _mm_add_epi32(tx, ny);
+ __m128i d_offset = _mm_add_epi32(nx, ny);
+
+ /* Multiply by pixelsize and add RGB offsets */
+ __m128i zero12 = _mm_load_si128((__m128i*)_zero12);
+ a_offset = _mm_add_epi32(zero12, _mm_slli_epi32(a_offset, 2));
+ b_offset = _mm_add_epi32(zero12, _mm_slli_epi32(b_offset, 2));
+ c_offset = _mm_add_epi32(zero12, _mm_slli_epi32(c_offset, 2));
+ d_offset = _mm_add_epi32(zero12, _mm_slli_epi32(d_offset, 2));
+
+ _mm_store_si128((__m128i*)xfer, a_offset);
+ _mm_store_si128((__m128i*)&xfer[4], b_offset);
+ _mm_store_si128((__m128i*)&xfer[8], c_offset);
+ _mm_store_si128((__m128i*)&xfer[12], d_offset);
+
+ gushort* pixels[12];
+
+ /* Loop unrolled, allows agressive instruction reordering */
+ /* Red, then G & B */
+ pixels[0] = in->pixels + xfer[0]; // a
+ pixels[1] = in->pixels + xfer[4]; // b
+ pixels[2] = in->pixels + xfer[8]; // c
+ pixels[3] = in->pixels + xfer[12]; // d
+
+ pixels[4] = in->pixels + xfer[1+0]; // a
+ pixels[5] = in->pixels + xfer[1+4]; // b
+ pixels[6] = in->pixels + xfer[1+8]; // c
+ pixels[7] = in->pixels + xfer[1+12]; // d
+
+ pixels[8] = in->pixels + xfer[2+0]; // a
+ pixels[9] = in->pixels + xfer[2+4]; // b
+ pixels[10] = in->pixels + xfer[2+8]; // c
+ pixels[11] = in->pixels + xfer[2+12]; // d
+
+ /* Calculate distances */
+ __m128i twofiftyfive = _mm_set1_epi32(255);
+ __m128i diffx = _mm_and_si128(x, twofiftyfive);
+ __m128i diffy = _mm_and_si128(y, twofiftyfive);
+ __m128i inv_diffx = _mm_andnot_si128(diffx, twofiftyfive);
+ __m128i inv_diffy = _mm_andnot_si128(diffy, twofiftyfive);
+
+ /* Calculate weights */
+ __m128i aw = _mm_srai_epi32(_mm_mullo_epi16(inv_diffx, inv_diffy),1);
+ __m128i bw = _mm_srai_epi32(_mm_mullo_epi16(diffx, inv_diffy),1);
+ __m128i cw = _mm_srai_epi32(_mm_mullo_epi16(inv_diffx, diffy),1);
+ __m128i dw = _mm_srai_epi32(_mm_mullo_epi16(diffx, diffy),1);
+
+ _mm_store_si128((__m128i*)xfer, aw);
+ _mm_store_si128((__m128i*)&xfer[4], bw);
+ _mm_store_si128((__m128i*)&xfer[8], cw);
+ _mm_store_si128((__m128i*)&xfer[12], dw);
+
+ gushort** p = pixels;
+ /* Loop unrolled */
+ out[0] = (gushort) ((xfer[0] * *p[0] + xfer[4] * *p[1] + xfer[8] *
*p[2] + xfer[12] * *p[3] + 16384) >> 15 );
+ p+=4;
+ out[1] = (gushort) ((xfer[1] * *p[0] + xfer[1+4] * *p[1] + xfer[1+8] *
*p[2] + xfer[1+12] * *p[3] + 16384) >> 15 );
+ p+=4;
+ out[2] = (gushort) ((xfer[2] * *p[0] + xfer[2+4] * *p[1] + xfer[2+8] *
*p[2] + xfer[2+12] * *p[3] + 16384) >> 15 );
+}
+
+#else // NO SSE2
+
gboolean is_sse2_compiled()
{
return FALSE;
@@ -214,4 +361,10 @@
rs_image16_bilinear_full_sse2(RS_IMAGE16 *in, gushort *out, gfloat *pos,const
gint *current_xy, const gint* min_max_xy)
{
}
+
+void
+rs_image16_bilinear_nomeasure_sse2(RS_IMAGE16 *in, gushort *out, gfloat *pos)
+{
+}
+
#endif // defined (__SSE2__)
Modified: trunk/plugins/lensfun/lensfun.c
===================================================================
--- trunk/plugins/lensfun/lensfun.c 2010-12-29 03:26:08 UTC (rev 3709)
+++ trunk/plugins/lensfun/lensfun.c 2010-12-29 15:14:57 UTC (rev 3710)
@@ -88,6 +88,7 @@
static void inline rs_image16_bilinear_full(RS_IMAGE16 *in, gushort *out,
gfloat *pos);
extern gboolean is_sse2_compiled();
extern void rs_image16_bilinear_full_sse2(RS_IMAGE16 *in, gushort *out, gfloat
*pos, const gint *current_xy, const gint* min_max_xy);
+extern void rs_image16_bilinear_nomeasure_sse2(RS_IMAGE16 *in, gushort *out,
gfloat *pos);
static RSFilterClass *rs_lensfun_parent_class = NULL;
G_MODULE_EXPORT void
@@ -296,6 +297,7 @@
GdkRectangle *roi;
gint stage;
gint min_max_xy[4];
+ gboolean measure_minmax_coords;
} ThreadInfo;
static gpointer
@@ -359,8 +361,15 @@
current_xy[1] = y;
for(x = 0; x < t->roi->width ; x++)
{
- current_xy[0] = x;
- rs_image16_bilinear_full_sse2(t->input,
target, l_pos, current_xy, min_max_xy);
+ if (t->measure_minmax_coords)
+ {
+ current_xy[0] = x;
+
rs_image16_bilinear_full_sse2(t->input, target, l_pos, current_xy, min_max_xy);
+ }
+ else
+ {
+
rs_image16_bilinear_nomeasure_sse2(t->input, target, l_pos);
+ }
target += 4;
l_pos += 6;
}
@@ -370,16 +379,19 @@
{
rs_image16_bilinear_full(t->input,
target, l_pos);
- /* Set minimum and maximum values */
- if (l_pos[0] < 0.0001 || l_pos[2] <
0.0001 || l_pos[4] < 0.0001)
- min_max_xy[0] = x;
- if (l_pos[1] < 0.0001 || l_pos[3] <
0.0001 || l_pos[5] < 0.0001)
- min_max_xy[1] = y;
- if ((l_pos[0] > max_w || l_pos[2] >
max_w || l_pos[4] > max_w) && min_max_xy[2] > 65535)
- min_max_xy[2] = x;
- if ((l_pos[1] > max_h || l_pos[3] >
max_h || l_pos[5] > max_h) && min_max_xy[3] > 65535)
- min_max_xy[3] = y;
-
+ if (t->measure_minmax_coords)
+ {
+ /* Set minimum and maximum
values */
+ if (l_pos[0] < 0.0001 ||
l_pos[2] < 0.0001 || l_pos[4] < 0.0001)
+ min_max_xy[0] = x;
+ if (l_pos[1] < 0.0001 ||
l_pos[3] < 0.0001 || l_pos[5] < 0.0001)
+ min_max_xy[1] = y;
+ if ((l_pos[0] > max_w ||
l_pos[2] > max_w || l_pos[4] > max_w) && min_max_xy[2] > 65535)
+ min_max_xy[2] = x;
+ if ((l_pos[1] > max_h ||
l_pos[3] > max_h || l_pos[5] > max_h) && min_max_xy[3] > 65535)
+ min_max_xy[3] = y;
+ }
+
target += pixelsize;
l_pos += 6;
}
@@ -421,6 +433,7 @@
const gchar *model = NULL;
GdkRectangle *roi, *vign_roi;
RS_RECT *proposed_crop;
+ gboolean measure_minmax_coords = FALSE;
previous_response = rs_filter_get_image(filter->previous, request);
@@ -525,6 +538,7 @@
gboolean destroy_roi = FALSE;
if (!roi)
{
+ measure_minmax_coords = TRUE;
roi = g_new(GdkRectangle, 1);
roi->x = 0;
roi->y = 0;
@@ -532,6 +546,8 @@
roi->height = input->h;
destroy_roi = TRUE;
}
+ else if (roi->x <= 0 && roi->y <= 0 && roi->width >= input->w &&
roi->height >= input->h)
+ measure_minmax_coords = TRUE;
/* Expand ROI by 25% in each direction for vignetting correction */
vign_roi = g_new(GdkRectangle, 1);
@@ -630,6 +646,7 @@
/* Set up job description for individual threads */
for (i = 0; i < threads; i++)
{
+ t[i].measure_minmax_coords =
measure_minmax_coords;
t[i].mod = mod;
t[i].effective_flags = effective_flags;
}
@@ -693,26 +710,30 @@
{
output = rs_image16_copy(input, TRUE);
}
- proposed_crop->x1 = proposed_crop->y1 = 0;
- proposed_crop->x2 = output->w-1;
- proposed_crop->y2 = output->h-1;
- for(i = 0; i < threads; i++)
+
+ if (measure_minmax_coords)
{
- proposed_crop->x1 = MIN(output->w-100,
MAX(proposed_crop->x1, t[i].min_max_xy[0]));
- proposed_crop->y1 = MIN(output->h-100,
MAX(proposed_crop->y1, t[i].min_max_xy[1]));
- proposed_crop->x2 = MAX(proposed_crop->x1+10,
MIN(proposed_crop->x2, t[i].min_max_xy[2]));
- proposed_crop->y2 = MAX(proposed_crop->y1+10,
MIN(proposed_crop->y2, t[i].min_max_xy[3]));
- }
- if (proposed_crop->x1 != 0 || proposed_crop->y1 != 0 ||
proposed_crop->x2 != output->w-1 || proposed_crop->y2 != output->h-1)
- {
-
rs_filter_param_set_integer(RS_FILTER_PARAM(response), "proposed-crop-x1",
proposed_crop->x1);
-
rs_filter_param_set_integer(RS_FILTER_PARAM(response), "proposed-crop-y1",
proposed_crop->y1);
-
rs_filter_param_set_integer(RS_FILTER_PARAM(response), "proposed-crop-x2",
proposed_crop->x2);
-
rs_filter_param_set_integer(RS_FILTER_PARAM(response), "proposed-crop-y2",
proposed_crop->y2);
+ proposed_crop->x1 = proposed_crop->y1 = 0;
+ proposed_crop->x2 = output->w-1;
+ proposed_crop->y2 = output->h-1;
+ for(i = 0; i < threads; i++)
+ {
+ proposed_crop->x1 = MIN(output->w-100,
MAX(proposed_crop->x1, t[i].min_max_xy[0]));
+ proposed_crop->y1 = MIN(output->h-100,
MAX(proposed_crop->y1, t[i].min_max_xy[1]));
+ proposed_crop->x2 =
MAX(proposed_crop->x1+10, MIN(proposed_crop->x2, t[i].min_max_xy[2]));
+ proposed_crop->y2 =
MAX(proposed_crop->y1+10, MIN(proposed_crop->y2, t[i].min_max_xy[3]));
+ }
+ if (proposed_crop->x1 != 0 || proposed_crop->y1
!= 0 || proposed_crop->x2 != output->w-1 || proposed_crop->y2 != output->h-1)
+ {
+
rs_filter_param_set_integer(RS_FILTER_PARAM(response), "proposed-crop-x1",
proposed_crop->x1);
+
rs_filter_param_set_integer(RS_FILTER_PARAM(response), "proposed-crop-y1",
proposed_crop->y1);
+
rs_filter_param_set_integer(RS_FILTER_PARAM(response), "proposed-crop-x2",
proposed_crop->x2);
+
rs_filter_param_set_integer(RS_FILTER_PARAM(response), "proposed-crop-y2",
proposed_crop->y2);
// g_debug("x1:%d, y1:%d, x2:%d, y2:%d",
proposed_crop->x1, proposed_crop->y1, proposed_crop->x2, proposed_crop->y2);
+ }
+ else
+ g_free(proposed_crop);
}
- else
- g_free(proposed_crop);
g_free(t);
rs_filter_response_set_image(response, output);
g_object_unref(output);
_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit