cedric pushed a commit to branch master. http://git.enlightenment.org/core/efl.git/commit/?id=6bb4ecd65db4cb674ca3dc6ef30ceb1a22445a90
commit 6bb4ecd65db4cb674ca3dc6ef30ceb1a22445a90 Author: Cedric BAIL <ced...@osg.samsung.com> Date: Fri Feb 20 17:11:44 2015 +0100 evas: use two thread when scaling image. This is for now just a small experiment. It was based on the experiment made with OpenMP. I prefered to only use Eina here as we have already all the infrastructure to do this nicely and simply. As a result I get a 65% speed improved on average for the involved scaling operation. The secondary CPU is on my laptop running with a load of 75% percent. I don't have right now the time to do power consumption analysis, but I think it shouldn't be to bad. I am also not throwing more core at this as we are not able to use the second core at its max already, so additional core may result in a bigger energy loss without enough gain. --- src/bin/evas/evas_cserve2_scale.c | 1 + src/lib/evas/common/evas_draw_main.c | 2 + src/lib/evas/common/evas_scale_main.h | 2 + src/lib/evas/common/evas_scale_sample.c | 300 ++++++++++++++++++++++++++++---- 4 files changed, 272 insertions(+), 33 deletions(-) diff --git a/src/bin/evas/evas_cserve2_scale.c b/src/bin/evas/evas_cserve2_scale.c index dd909db..bf6e940 100644 --- a/src/bin/evas/evas_cserve2_scale.c +++ b/src/bin/evas/evas_cserve2_scale.c @@ -9,6 +9,7 @@ cserve2_scale_init(void) evas_common_image_init(); evas_common_convert_init(); evas_common_scale_init(); + evas_common_scale_sample_init(); } void diff --git a/src/lib/evas/common/evas_draw_main.c b/src/lib/evas/common/evas_draw_main.c index 5d1e415..ed1f951 100644 --- a/src/lib/evas/common/evas_draw_main.c +++ b/src/lib/evas/common/evas_draw_main.c @@ -45,6 +45,7 @@ evas_common_init(void) evas_common_image_init(); evas_common_convert_init(); evas_common_scale_init(); + evas_common_scale_sample_init(); evas_common_rectangle_init(); evas_common_polygon_init(); evas_common_line_init(); @@ -60,6 +61,7 @@ evas_common_shutdown(void) evas_font_dir_cache_free(); evas_common_image_cache_free(); + evas_common_scale_sample_shutdown(); } EAPI void diff --git a/src/lib/evas/common/evas_scale_main.h b/src/lib/evas/common/evas_scale_main.h index c0e1f94..76c46c8 100644 --- a/src/lib/evas/common/evas_scale_main.h +++ b/src/lib/evas/common/evas_scale_main.h @@ -4,6 +4,8 @@ typedef Eina_Bool (*Evas_Common_Scale_In_To_Out_Clip_Cb)(RGBA_Image *src, RGBA_Image *dst, RGBA_Draw_Context *dc, int src_region_x, int src_region_y, int src_region_w, int src_region_h, int dst_region_x, int dst_region_y, int dst_region_w, int dst_region_h); EAPI void evas_common_scale_init (void); +EAPI void evas_common_scale_sample_init (void); +EAPI void evas_common_scale_sample_shutdown (void); EAPI Eina_Bool evas_common_scale_rgba_in_to_out_clip_cb (RGBA_Image *src, RGBA_Image *dst, RGBA_Draw_Context *dc, int src_region_x, int src_region_y, int src_region_w, int src_region_h, int dst_region_x, int dst_region_y, int dst_region_w, int dst_region_h, Evas_Common_Scale_In_To_Out_Clip_Cb cb); EAPI Eina_Bool evas_common_scale_rgba_in_to_out_clip_smooth (RGBA_Image *src, RGBA_Image *dst, RGBA_Draw_Context *dc, int src_region_x, int src_region_y, int src_region_w, int src_region_h, int dst_region_x, int dst_region_y, int dst_region_w, int dst_region_h); diff --git a/src/lib/evas/common/evas_scale_sample.c b/src/lib/evas/common/evas_scale_sample.c index 940ccba..09a0631 100644 --- a/src/lib/evas/common/evas_scale_sample.c +++ b/src/lib/evas/common/evas_scale_sample.c @@ -3,6 +3,42 @@ static Eina_Bool scale_rgba_in_to_out_clip_sample_internal(RGBA_Image *src, RGBA_Image *dst, RGBA_Draw_Context *dc, int src_region_x, int src_region_y, int src_region_w, int src_region_h, int dst_region_x, int dst_region_y, int dst_region_w, int dst_region_h); +typedef struct _Evas_Scale_Thread Evas_Scale_Thread; +typedef struct _Evas_Scale_Msg Evas_Scale_Msg; + +struct _Evas_Scale_Msg +{ + Eina_Thread_Queue_Msg head; + Evas_Scale_Thread *task; +}; + +struct _Evas_Scale_Thread +{ + RGBA_Image *mask8; + DATA32 **row_ptr; + DATA32 *dptr; + int *lin_ptr; + + RGBA_Gfx_Func func; + RGBA_Gfx_Func func2; + + int dst_clip_x; + int dst_clip_y; + int dst_clip_h; + int dst_clip_w; + int dst_w; + + int mask_x; + int mask_y; + + unsigned int mul_col; +}; + +static Eina_Bool use_thread = EINA_FALSE; +static Eina_Thread scaling_thread; +static Eina_Thread_Queue *thread_queue = NULL; +static Eina_Thread_Queue *main_queue = NULL; + EAPI Eina_Bool evas_common_scale_rgba_in_to_out_clip_sample(RGBA_Image *src, RGBA_Image *dst, RGBA_Draw_Context *dc, @@ -58,6 +94,80 @@ evas_common_scale_rgba_in_to_out_clip_sample_do(const Cutout_Rects *reuse, } } +static void +_evas_common_scale_rgba_sample_scale_nomask(int y, + int dst_clip_w, int dst_clip_h, int dst_w, + DATA32 **row_ptr, int *lin_ptr, + DATA32 *dptr, RGBA_Gfx_Func func, unsigned int mul_col) +{ + DATA32 *buf, *dst_ptr; + int x; + + /* a scanline buffer */ + buf = alloca(dst_clip_w * sizeof(DATA32)); + + dptr = dptr + dst_w * y; + for (; y < dst_clip_h; y++) + { + dst_ptr = buf; + for (x = 0; x < dst_clip_w; x++) + { + DATA32 *ptr; + + ptr = row_ptr[y] + lin_ptr[x]; + *dst_ptr = *ptr; + dst_ptr++; + } + + /* * blend here [clip_w *] buf -> dptr * */ + func(buf, NULL, mul_col, dptr, dst_clip_w); + + dptr += dst_w; + } +} + +static void +_evas_common_scale_rgba_sample_scale_mask(int y, + int dst_clip_x, int dst_clip_y, + int dst_clip_w, int dst_clip_h, int dst_w, + int mask_x, int mask_y, + DATA32 **row_ptr, int *lin_ptr, RGBA_Image *im, + DATA32 *dptr, RGBA_Gfx_Func func, RGBA_Gfx_Func func2, + unsigned int mul_col) +{ + DATA32 *buf, *dst_ptr; + int x; + + /* a scanline buffer */ + buf = alloca(dst_clip_w * sizeof(DATA32)); + + dptr = dptr + dst_w * y; + for (; y < dst_clip_h; y++) + { + DATA8 *mask; + + dst_ptr = buf; + mask = im->image.data8 + + ((dst_clip_y - mask_y + y) * im->cache_entry.w) + + (dst_clip_x - mask_x); + + for (x = 0; x < dst_clip_w; x++) + { + DATA32 *ptr; + + ptr = row_ptr[y] + lin_ptr[x]; + *dst_ptr = *ptr; + dst_ptr++; + } + + /* * blend here [clip_w *] buf -> dptr * */ + if (mul_col != 0xFFFFFFFF) func2(buf, NULL, mul_col, buf, dst_clip_w); + func(buf, mask, 0, dptr, dst_clip_w); + + dptr += dst_w; + } +} + EAPI void evas_common_scale_rgba_sample_draw(RGBA_Image *src, RGBA_Image *dst, int dst_clip_x, int dst_clip_y, int dst_clip_w, int dst_clip_h, DATA32 mul_col, int render_op, int src_region_x, int src_region_y, int src_region_w, int src_region_h, int dst_region_x, int dst_region_y, int dst_region_w, int dst_region_h, RGBA_Image *mask_ie, int mask_x, int mask_y) { @@ -587,51 +697,83 @@ scale_rgba_in_to_out_clip_sample_internal(RGBA_Image *src, RGBA_Image *dst, else #endif { + unsigned int mul_col; + /* a scanline buffer */ buf = alloca(dst_clip_w * sizeof(DATA32)); - /* image masking */ - if (dc->clip.mask) - { - RGBA_Image *im = dc->clip.mask; + mul_col = dc->mul.use ? dc->mul.col : 0xFFFFFFFF; - for (y = 0; y < dst_clip_h; y++) + /* do we have enough data to start some additional thread ? */ + if (use_thread && dst_clip_h > 32 && dst_clip_w * dst_clip_h > 4096) + { + /* Yes, we do ! */ + Evas_Scale_Msg *msg; + void *ref; + Evas_Scale_Thread local; + + local.mask8 = dc->clip.mask; + local.row_ptr = row_ptr; + local.dptr = dptr; + local.lin_ptr = lin_ptr; + local.func = func; + local.func2 = func2; + local.dst_clip_x = dst_clip_x; + local.dst_clip_y = dst_clip_y; + local.dst_clip_h = dst_clip_h; + local.dst_clip_w = dst_clip_w; + local.dst_w = dst_w; + local.mask_x = dc->clip.mask_x; + local.mask_y = dc->clip.mask_y; + local.mul_col = mul_col; + + msg = eina_thread_queue_send(thread_queue, sizeof (Evas_Scale_Msg), &ref); + msg->task = &local; + eina_thread_queue_send_done(thread_queue, ref); + + /* image masking */ + if (dc->clip.mask) { - dst_ptr = buf; - mask = im->image.data8 - + ((dst_clip_y - dc->clip.mask_y + y) * im->cache_entry.w) - + (dst_clip_x - dc->clip.mask_x); - - for (x = 0; x < dst_clip_w; x++) - { - ptr = row_ptr[y] + lin_ptr[x]; - *dst_ptr = *ptr; - dst_ptr++; - } - - /* * blend here [clip_w *] buf -> dptr * */ - if (dc->mul.use) func2(buf, NULL, dc->mul.col, buf, dst_clip_w); - func(buf, mask, 0, dptr, dst_clip_w); + _evas_common_scale_rgba_sample_scale_mask(0, + dst_clip_x, dst_clip_y, + dst_clip_w, dst_clip_h >> 1, dst_w, + dc->clip.mask_x, dc->clip.mask_y, + row_ptr, lin_ptr, dc->clip.mask, + dptr, func, func2, mul_col); - dptr += dst_w; } + else + { + _evas_common_scale_rgba_sample_scale_nomask(0, + dst_clip_w, dst_clip_h >> 1, dst_w, + row_ptr, lin_ptr, + dptr, func, mul_col); + } + + msg = eina_thread_queue_wait(main_queue, &ref); + if (msg) eina_thread_queue_wait_done(main_queue, ref); } else { - for (y = 0; y < dst_clip_h; y++) - { - dst_ptr = buf; - for (x = 0; x < dst_clip_w; x++) - { - ptr = row_ptr[y] + lin_ptr[x]; - *dst_ptr = *ptr; - dst_ptr++; - } + /* No we don't ! */ - /* * blend here [clip_w *] buf -> dptr * */ - func(buf, NULL, dc->mul.col, dptr, dst_clip_w); + /* image masking */ + if (dc->clip.mask) + { + _evas_common_scale_rgba_sample_scale_mask(0, + dst_clip_x, dst_clip_y, + dst_clip_w, dst_clip_h, dst_w, + dc->clip.mask_x, dc->clip.mask_y, + row_ptr, lin_ptr, dc->clip.mask, + dptr, func, func2, mul_col); - dptr += dst_w; + } + else + { + _evas_common_scale_rgba_sample_scale_nomask(0, + dst_clip_w, dst_clip_h, dst_w, + row_ptr, lin_ptr, + dptr, func, mul_col); } } } @@ -639,3 +781,95 @@ scale_rgba_in_to_out_clip_sample_internal(RGBA_Image *src, RGBA_Image *dst, return EINA_TRUE; } + +static void * +_evas_common_scale_sample_thread(void *data EINA_UNUSED, + Eina_Thread t EINA_UNUSED) +{ + Evas_Scale_Msg *msg; + Evas_Scale_Thread *todo = NULL; + + do + { + void *ref; + + todo = NULL; + + msg = eina_thread_queue_wait(thread_queue, &ref); + if (msg) + { + int h; + + todo = msg->task; + eina_thread_queue_wait_done(thread_queue, &ref); + + if (!todo) goto end; + + h = todo->dst_clip_h >> 1; + + if (todo->mask8) + _evas_common_scale_rgba_sample_scale_mask(h, + todo->dst_clip_x, todo->dst_clip_y, + todo->dst_clip_w, todo->dst_clip_h, + todo->dst_w, + todo->mask_x, todo->mask_y, + todo->row_ptr, todo->lin_ptr, todo->mask8, + todo->dptr, todo->func, todo->func2, + todo->mul_col); + else + _evas_common_scale_rgba_sample_scale_nomask(h, + todo->dst_clip_w, todo->dst_clip_h, + todo->dst_w, + todo->row_ptr, todo->lin_ptr, + todo->dptr, todo->func, todo->mul_col); + } + + end: + msg = eina_thread_queue_send(main_queue, sizeof (Evas_Scale_Msg), &ref); + msg->task = NULL; + eina_thread_queue_send_done(main_queue, ref); + } + while (todo); + + return NULL; +} + +EAPI void +evas_common_scale_sample_init(void) +{ + if (eina_cpu_count() <= 2) return ; + + thread_queue = eina_thread_queue_new(); + main_queue = eina_thread_queue_new(); + + if (!eina_thread_create(&scaling_thread, EINA_THREAD_NORMAL, -1, + _evas_common_scale_sample_thread, NULL)) + { + return; + } + + use_thread = EINA_TRUE; +} + +EAPI void +evas_common_scale_sample_shutdown(void) +{ + Evas_Scale_Msg *msg; + void *ref; + + if (!use_thread) return ; + + msg = eina_thread_queue_send(thread_queue, sizeof (Evas_Scale_Msg), &ref); + msg->task = NULL; + eina_thread_queue_send_done(thread_queue, ref); + + /* Here is the thread commiting succide*/ + + msg = eina_thread_queue_wait(main_queue, &ref); + if (msg) eina_thread_queue_wait_done(main_queue, ref); + + eina_thread_join(scaling_thread); + + eina_thread_queue_free(thread_queue); + eina_thread_queue_free(main_queue); +} --