For the SRC compositing operator, the combine step in the generic "fetch -> combine -> writeback" pipeline may be redundant.
Some examples: 1. bilinear a8r8g8b8 -> a8r8g8b8 We have a redundant copy from the source iterator temporary buffer directly to the destination buffer, while just direct fetch to the destination could be supported. 2. bilinear a8r8g8b8 -> r5g6b5 We have a redundant copy from the source iterator temporary buffer to the destination iterator temporary buffer, while direct fetch to the destination iterator temporary buffer could be done. 3. a8r8g8b8 -> r5g6b5 The source noop iterator is nicely iterating over the source image and returning pointers to scanlines instead of copying, but then this data gets copied to the temporary buffer of the destination iterator instead of doing direct writeback with conversion from the scanline pointers returned by the source iterator. 4. horizontal linear gradient -> r5g6b5 One more example of the source noop iterator. The behavior is similar to "a8r8g8b8 -> r5g6b5" conversion. This problem can be resolved by allowing the source iterators to fetch data to the externally provided buffers. And in a similar way allow the destination iterators to write back from the externally provided buffers. It's up to the iterator implementation to decide if such shortcut can be safely supported. If using the external buffer can be supported, then the iterator may implement the new "offer_buffer" method. V2: - Add a new iterator method call to provide buffers to iterators instead of using the pointer to pointer hacks - Also support a shortcut for writeback in addition to fetch --- pixman/pixman-general.c | 61 +++++++++++++++++++++++++++++++++++++----- pixman/pixman-implementation.c | 1 + pixman/pixman-private.h | 25 +++++++++++++++++ pixman/pixman-utils.c | 14 ++++++++++ 4 files changed, 94 insertions(+), 7 deletions(-) diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c index 6310bff..6a7434b 100644 --- a/pixman/pixman-general.c +++ b/pixman/pixman-general.c @@ -195,17 +195,64 @@ general_composite_rect (pixman_implementation_t *imp, compose = _pixman_implementation_lookup_combiner ( imp->toplevel, op, component_alpha, width_flag != ITER_WIDE); - for (i = 0; i < height; ++i) + if (op == PIXMAN_OP_SRC && !mask_image && src_iter.offer_buffer) { - uint32_t *s, *m, *d; + /* Avoid memcpy combiner by supporting direct fetch to the destination + * (either to the destination image directly or to the temporary buffer + * of the destination iterator). This works fine if the source iterator + * actually produces data for each scanline. + */ + for (i = 0; i < height; ++i) + { + uint32_t *s, *m, *d; + + m = mask_iter.get_scanline (&mask_iter, NULL); + d = dest_iter.get_scanline (&dest_iter, NULL); + src_iter.offer_buffer (&src_iter, d); + s = src_iter.get_scanline (&src_iter, m); + + if (s != d) + compose (imp->toplevel, op, d, s, m, width); + + dest_iter.write_back (&dest_iter); + } + } + else if (op == PIXMAN_OP_SRC && !mask_image && dest_iter.offer_buffer) + { + /* We still can support writeback to the destination directly from the + * pointers, returned by the noop source iterators. This is useful when + * iterating over the source images in native a8r8g8b8 format and for + * some solids/gradients. + */ + for (i = 0; i < height; ++i) + { + uint32_t *s, *m, *d; + + m = mask_iter.get_scanline (&mask_iter, NULL); + s = src_iter.get_scanline (&src_iter, m); + dest_iter.offer_buffer (&dest_iter, s); + d = dest_iter.get_scanline (&dest_iter, NULL); + + if (s != d) + compose (imp->toplevel, op, d, s, m, width); + + dest_iter.write_back (&dest_iter); + } + } + else + { + for (i = 0; i < height; ++i) + { + uint32_t *s, *m, *d; - m = mask_iter.get_scanline (&mask_iter, NULL); - s = src_iter.get_scanline (&src_iter, m); - d = dest_iter.get_scanline (&dest_iter, NULL); + m = mask_iter.get_scanline (&mask_iter, NULL); + s = src_iter.get_scanline (&src_iter, m); + d = dest_iter.get_scanline (&dest_iter, NULL); - compose (imp->toplevel, op, d, s, m, width); + compose (imp->toplevel, op, d, s, m, width); - dest_iter.write_back (&dest_iter); + dest_iter.write_back (&dest_iter); + } } if (src_iter.fini) diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c index 5884054..699ff10 100644 --- a/pixman/pixman-implementation.c +++ b/pixman/pixman-implementation.c @@ -314,6 +314,7 @@ _pixman_implementation_iter_init (pixman_implementation_t *imp, iter->iter_flags = iter_flags; iter->image_flags = image_flags; iter->fini = NULL; + iter->offer_buffer = NULL; if (!iter->image) { diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index 9646605..7bbb5b1 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -207,6 +207,23 @@ union pixman_image }; typedef struct pixman_iter_t pixman_iter_t; +/* + * Using the "offer_buffer" method, the iterator may be supplied with a + * new buffer, which can override the original buffer initially provided + * to _pixman_implementation_iter_init() function. The caller typically + * expects that using this new buffer would be preferable for returning + * the scanline data on the next "get_scanline" call. Still that's only + * a hint and the iterator implementation is free to ignore the buffers + * provided by this method (either ignore all of them or selectively + * accept/ignore buffers, based on buffer pointer alignment or other + * checks). The original buffer pointer, which was passed as an argument + * to the _pixman_implementation_iter_init() function, can be always used + * as a fallback solution. + * + * Note: in the case if the buffer is accepted, it can be only used until + * the next "offer_buffer" call or until the iterator destruction. + */ +typedef void (* pixman_iter_offer_buffer_t) (pixman_iter_t *iter, uint32_t *buffer); typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask); typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter); typedef void (* pixman_iter_fini_t) (pixman_iter_t *iter); @@ -257,6 +274,7 @@ struct pixman_iter_t pixman_iter_get_scanline_t get_scanline; pixman_iter_write_back_t write_back; pixman_iter_fini_t fini; + pixman_iter_offer_buffer_t offer_buffer; /* These fields are scratch data that implementations can use */ void * data; @@ -655,8 +673,15 @@ uint32_t * _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask); void +_pixman_iter_accept_buffer_offers (pixman_iter_t *iter, uint32_t *buffer); + +void _pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info); +void +_pixman_iter_init_bits_stride_and_accept_buffer_offers (pixman_iter_t *iter, + const pixman_iter_info_t *info); + /* These "formats" all have depth 0, so they * will never clash with any real ones */ diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c index 98723a8..bee9a06 100644 --- a/pixman/pixman-utils.c +++ b/pixman/pixman-utils.c @@ -215,6 +215,12 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask) } void +_pixman_iter_accept_buffer_offers (pixman_iter_t *iter, uint32_t *buffer) +{ + iter->buffer = buffer; +} + +void _pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; @@ -225,6 +231,14 @@ _pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *in iter->stride = s; } +void +_pixman_iter_init_bits_stride_and_accept_buffer_offers (pixman_iter_t *iter, + const pixman_iter_info_t *info) +{ + _pixman_iter_init_bits_stride (iter, info); + iter->offer_buffer = _pixman_iter_accept_buffer_offers; +} + #define N_TMP_BOXES (16) pixman_bool_t -- 1.8.1.5 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman