From: Siarhei Siamashka <siarhei.siamas...@nokia.com> Because of doing scaling in a single pass without temporary buffers, it is a bit faster than general path on x86 (and provides even better speedup on MIPS and ARM).
Benchmark on Intel Core i7: Using cairo-perf-trace: before: image firefox-planet-gnome 12.566 12.610 0.23% 6/6 after: image firefox-planet-gnome 12.019 12.054 0.15% 5/6 Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): before: op=1, src=20028888, dst=20028888, speed=70.48 MPix/s after: op=1, src=20028888, dst=20028888, speed=82.61 MPix/s Benchmark on ARM Cortex-A8: Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): before: op=1, src=20028888, dst=20028888, speed=6.70 MPix/s after: op=1, src=20028888, dst=20028888, speed=10.72 MPix/s Benchmark on MIPS 24K: Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): before: op=1, src=20028888, dst=20028888, speed=5.12 MPix/s after: op=1, src=20028888, dst=20028888, speed=6.96 MPix/s Microbenchmark (scaling 500x500 image with scale factor close to 1x): before: op=1, src=20028888, dst=20028888, speed=5.26 MPix/s after: op=1, src=20028888, dst=20028888, speed=7.00 MPix/s --- pixman/pixman-fast-path.c | 144 +++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 144 insertions(+), 0 deletions(-) diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index 92f0308..1e3094e 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -1458,6 +1458,143 @@ FAST_NEAREST_MAINLOOP (565_565_pad_SRC, uint16_t, uint16_t, PAD) static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int wt, int wb) +{ +#if SIZEOF_LONG > 4 + uint64_t distxy, distxiy, distixy, distixiy; + uint64_t tl64, tr64, bl64, br64; + uint64_t f, r; + + distxy = distx * wb; + distxiy = distx * wt; + distixy = wb * (256 - distx); + distixiy = (256 - distx) * wt; + + /* Alpha and Blue */ + tl64 = tl & 0xff0000ff; + tr64 = tr & 0xff0000ff; + bl64 = bl & 0xff0000ff; + br64 = br & 0xff0000ff; + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r = f & 0x0000ff0000ff0000ull; + + /* Red and Green */ + tl64 = tl; + tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); + + tr64 = tr; + tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); + + bl64 = bl; + bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); + + br64 = br; + br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); + + return (uint32_t)(r >> 16); +#else + int distxy, distxiy, distixy, distixiy; + uint32_t f, r; + + distxy = distx * wb; + distxiy = distx * wt; + distixy = wb * (256 - distx); + distixiy = (256 - distx) * wt; + + /* Blue */ + r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; + + /* Green */ + f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; + r |= f & 0xff000000; + + tl >>= 16; + tr >>= 16; + bl >>= 16; + br >>= 16; + r >>= 16; + + /* Red */ + f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; + r |= f & 0x00ff0000; + + /* Alpha */ + f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; + r |= f & 0xff000000; + + return r; +#endif +} + +static void +bilinear_interpolate_line (uint32_t * buffer, + const uint32_t * top_row, + const uint32_t * bottom_row, + int wt, + int wb, + pixman_fixed_t x, + pixman_fixed_t ux, + int width) +{ + while (--width >= 0) + { + uint32_t tl, tr, bl, br; + int distx; + + tl = top_row [pixman_fixed_to_int (x)]; + tr = top_row [pixman_fixed_to_int (x) + 1]; + bl = bottom_row [pixman_fixed_to_int (x)]; + br = bottom_row [pixman_fixed_to_int (x) + 1]; + + distx = (x >> 8) & 0xff; + + *buffer++ = bilinear_interpolation (tl, tr, bl, br, distx, wt, wb); + + x += ux; + } +} + +static force_inline void +scaled_bilinear_scanline_8888_8888_SRC (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + bilinear_interpolate_line (dst, src_top, src_bottom, + wt, wb, vx, unit_x, w); +} + +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_SRC, + scaled_bilinear_scanline_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + COVER, FALSE, FALSE) +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_SRC, + scaled_bilinear_scanline_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + PAD, FALSE, FALSE) +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_SRC, + scaled_bilinear_scanline_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NONE, FALSE, FALSE) + +static force_inline uint32_t fetch_nearest (pixman_repeat_t src_repeat, pixman_format_code_t format, uint32_t *src, int x, int src_width) @@ -1973,6 +2110,13 @@ static const pixman_fast_path_t c_fast_paths[] = SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888), + #define NEAREST_FAST_PATH(op,s,d) \ { PIXMAN_OP_ ## op, \ PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \ -- 1.7.3.4 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman