Author: post
Date: 2009-10-11 17:36:36 +0200 (Sun, 11 Oct 2009)
New Revision: 2698

Modified:
   trunk/plugins/dcp/dcp.c
Log:
Made DCP renderer multithreaded.

Modified: trunk/plugins/dcp/dcp.c
===================================================================
--- trunk/plugins/dcp/dcp.c     2009-10-11 13:52:17 UTC (rev 2697)
+++ trunk/plugins/dcp/dcp.c     2009-10-11 15:36:36 UTC (rev 2698)
@@ -81,6 +81,16 @@
        RSIccProfile *prophoto_profile;
 };
 
+typedef struct {
+       RSDcp *dcp;
+       GThread *threadid;
+       gint start_x;
+       gint start_y;
+       gint end_y;
+       RS_IMAGE16 *tmp;
+
+} ThreadInfo;
+
 RS_DEFINE_FILTER(rs_dcp, RSDcp)
 
 enum {
@@ -97,9 +107,9 @@
 static RS_MATRIX3 find_xyz_to_camera(RSDcp *dcp, const RS_xy_COORD *white_xy, 
RS_MATRIX3 *forward_matrix);
 static void set_white_xy(RSDcp *dcp, const RS_xy_COORD *xy);
 static void precalc(RSDcp *dcp);
-static void render(RSDcp *dcp, RS_IMAGE16 *image);
+static void render(ThreadInfo* t);
 #if defined (__SSE2__)
-static void render_SSE2(RSDcp *dcp, RS_IMAGE16 *image);
+static void render_SSE2(ThreadInfo* t);
 #endif
 static void read_profile(RSDcp *dcp, RSDcpFile *dcp_file);
 static RSIccProfile *get_icc_profile(RSFilter *filter);
@@ -285,6 +295,32 @@
        }
 }
 
+gpointer
+start_single_dcp_thread(gpointer _thread_info)
+{
+       ThreadInfo* t = _thread_info;
+       RS_IMAGE16 *tmp = t->tmp;
+
+#if defined (__SSE2__)
+       if (rs_detect_cpu_features() & RS_CPU_FLAG_SSE2)
+       {
+               render_SSE2(t);
+               if (tmp->w & 3)
+               {
+                       t->start_x = tmp->w - (tmp->w & 3);
+                       render(t);
+               }
+
+       }
+       else
+#endif
+               render(t);
+
+       g_thread_exit(NULL);
+
+       return NULL; /* Make the compiler shut up - we'll never return */
+}
+
 static RSFilterResponse *
 get_image(RSFilter *filter, const RSFilterParam *param)
 {
@@ -317,13 +353,33 @@
        else
                tmp = g_object_ref(output);
 
-#if defined (__SSE2__)
-       if (rs_detect_cpu_features() & RS_CPU_FLAG_SSE2)
-               render_SSE2(dcp, tmp);
-       else
-#endif
-               render(dcp, tmp);
+       guint i, y_offset, y_per_thread, threaded_h;
+       const guint threads = rs_get_number_of_processor_cores();
+       ThreadInfo *t = g_new(ThreadInfo, threads);
 
+       threaded_h = tmp->h;
+       y_per_thread = (threaded_h + threads-1)/threads;
+       y_offset = 0;
+
+       for (i = 0; i < threads; i++)
+       {
+               t[i].tmp = tmp;
+               t[i].start_y = y_offset;
+               t[i].start_x = 0;
+               t[i].dcp = dcp;
+               y_offset += y_per_thread;
+               y_offset = MIN(tmp->h, y_offset);
+               t[i].end_y = y_offset;
+
+               t[i].threadid = g_thread_create(start_single_dcp_thread, &t[i], 
TRUE, NULL);
+       }
+
+       /* Wait for threads to finish */
+       for(i = 0; i < threads; i++)
+               g_thread_join(t[i].threadid);
+
+       g_free(t);
+
        g_object_unref(tmp);
 
        return response;
@@ -1040,8 +1096,10 @@
 }
 
 static void
-render_SSE2(RSDcp *dcp, RS_IMAGE16 *image)
+render_SSE2(ThreadInfo* t)
 {
+       RS_IMAGE16 *image = t->tmp;
+       RSDcp *dcp = t->dcp;
        gint x, y;
        __m128 h, s, v;
        __m128i p1,p2;
@@ -1057,10 +1115,11 @@
        gfloat r_coeffs[3] = {dcp->camera_to_prophoto.coeff[0][0], 
dcp->camera_to_prophoto.coeff[0][1], dcp->camera_to_prophoto.coeff[0][2]};
        gfloat g_coeffs[3] = {dcp->camera_to_prophoto.coeff[1][0], 
dcp->camera_to_prophoto.coeff[1][1], dcp->camera_to_prophoto.coeff[1][2]};
        gfloat b_coeffs[3] = {dcp->camera_to_prophoto.coeff[2][0], 
dcp->camera_to_prophoto.coeff[2][1], dcp->camera_to_prophoto.coeff[2][2]};
+       gint end_x = image->w - (image->w & 3);
 
-       for(y = 0 ; y < image->h; y++)
+       for(y = t->start_y ; y < t->end_y; y++)
        {
-               for(x=0; x < image->w; x+=4)
+               for(x=0; x < end_x; x+=4)
                {
                        __m128i* pixel = (__m128i*)GET_PIXEL(image, x, y);
 
@@ -1289,8 +1348,11 @@
 #endif
 
 static void
-render(RSDcp *dcp, RS_IMAGE16 *image)
+render(ThreadInfo* t)
 {
+       RS_IMAGE16 *image = t->tmp;
+       RSDcp *dcp = t->dcp;
+
        gint x, y;
        gfloat h, s, v;
        gfloat r, g, b;
@@ -1298,9 +1360,9 @@
 
        const gfloat exposure_comp = pow(2.0, dcp->exposure);
 
-       for(y = 0 ; y < image->h; y++)
+       for(y = t->start_y ; y < t->end_y; y++)
        {
-               for(x=0; x < image->w; x++)
+               for(x=t->start_x; x < image->w; x++)
                {
                        gushort *pixel = GET_PIXEL(image, x, y);
 


_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit

Reply via email to