From: Siarhei Siamashka <siarhei.siamas...@nokia.com>

Because of doing scaling in a single pass without temporary buffers, it
is a bit faster than general path on x86 (and provides even better speedup
on MIPS and ARM).

Benchmark on Intel Core i7:
 Using cairo-perf-trace:
  before: image        firefox-planet-gnome   12.566   12.610   0.23%    6/6
  after:  image        firefox-planet-gnome   12.019   12.054   0.15%    5/6

 Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
  before: op=1, src=20028888, dst=20028888, speed=70.48 MPix/s
  after:  op=1, src=20028888, dst=20028888, speed=82.61 MPix/s

Benchmark on ARM Cortex-A8:
 Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
  before: op=1, src=20028888, dst=20028888, speed=6.70 MPix/s
  after:  op=1, src=20028888, dst=20028888, speed=10.72 MPix/s

Benchmark on MIPS 24K:
 Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
  before: op=1, src=20028888, dst=20028888, speed=5.12 MPix/s
  after:  op=1, src=20028888, dst=20028888, speed=6.96 MPix/s

 Microbenchmark (scaling 500x500 image with scale factor close to 1x):
  before: op=1, src=20028888, dst=20028888, speed=5.26 MPix/s
  after:  op=1, src=20028888, dst=20028888, speed=7.00 MPix/s
---
 pixman/pixman-fast-path.c |  144 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 144 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 92f0308..1e3094e 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1458,6 +1458,143 @@ FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
                       uint16_t, uint16_t, PAD)
 
 static force_inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+                       uint32_t bl, uint32_t br,
+                       int distx, int wt, int wb)
+{
+#if SIZEOF_LONG > 4
+    uint64_t distxy, distxiy, distixy, distixiy;
+    uint64_t tl64, tr64, bl64, br64;
+    uint64_t f, r;
+
+    distxy = distx * wb;
+    distxiy = distx * wt;
+    distixy = wb * (256 - distx);
+    distixiy = (256 - distx) * wt;
+
+    /* Alpha and Blue */
+    tl64 = tl & 0xff0000ff;
+    tr64 = tr & 0xff0000ff;
+    bl64 = bl & 0xff0000ff;
+    br64 = br & 0xff0000ff;
+
+    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
+    r = f & 0x0000ff0000ff0000ull;
+
+    /* Red and Green */
+    tl64 = tl;
+    tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
+
+    tr64 = tr;
+    tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
+
+    bl64 = bl;
+    bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
+
+    br64 = br;
+    br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
+
+    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
+    r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
+
+    return (uint32_t)(r >> 16);
+#else
+    int distxy, distxiy, distixy, distixiy;
+    uint32_t f, r;
+
+    distxy = distx * wb;
+    distxiy = distx * wt;
+    distixy = wb * (256 - distx);
+    distixiy = (256 - distx) * wt;
+
+    /* Blue */
+    r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
+
+    /* Green */
+    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
+    r |= f & 0xff000000;
+
+    tl >>= 16;
+    tr >>= 16;
+    bl >>= 16;
+    br >>= 16;
+    r >>= 16;
+
+    /* Red */
+    f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
+    r |= f & 0x00ff0000;
+
+    /* Alpha */
+    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
+    r |= f & 0xff000000;
+
+    return r;
+#endif
+}
+
+static void
+bilinear_interpolate_line (uint32_t *       buffer,
+                          const uint32_t * top_row,
+                          const uint32_t * bottom_row,
+                          int              wt,
+                          int              wb,
+                          pixman_fixed_t   x,
+                          pixman_fixed_t   ux,
+                          int              width)
+{
+    while (--width >= 0)
+    {
+       uint32_t tl, tr, bl, br;
+       int distx;
+
+       tl = top_row [pixman_fixed_to_int (x)];
+       tr = top_row [pixman_fixed_to_int (x) + 1];
+       bl = bottom_row [pixman_fixed_to_int (x)];
+       br = bottom_row [pixman_fixed_to_int (x) + 1];
+
+       distx = (x >> 8) & 0xff;
+
+       *buffer++ = bilinear_interpolation (tl, tr, bl, br, distx, wt, wb);
+
+       x += ux;
+    }
+}
+
+static force_inline void
+scaled_bilinear_scanline_8888_8888_SRC (uint32_t *       dst,
+                                       const uint32_t * mask,
+                                       const uint32_t * src_top,
+                                       const uint32_t * src_bottom,
+                                       int32_t          w,
+                                       int              wt,
+                                       int              wb,
+                                       pixman_fixed_t   vx,
+                                       pixman_fixed_t   unit_x,
+                                       pixman_fixed_t   max_vx,
+                                       pixman_bool_t    zero_src)
+{
+    bilinear_interpolate_line (dst, src_top, src_bottom,
+                              wt, wb, vx, unit_x, w);
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_SRC,
+                              scaled_bilinear_scanline_8888_8888_SRC,
+                              uint32_t, uint32_t, uint32_t,
+                              COVER, FALSE, FALSE)
+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_SRC,
+                              scaled_bilinear_scanline_8888_8888_SRC,
+                              uint32_t, uint32_t, uint32_t,
+                              PAD, FALSE, FALSE)
+FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_SRC,
+                              scaled_bilinear_scanline_8888_8888_SRC,
+                              uint32_t, uint32_t, uint32_t,
+                              NONE, FALSE, FALSE)
+
+static force_inline uint32_t
 fetch_nearest (pixman_repeat_t src_repeat,
               pixman_format_code_t format,
               uint32_t *src, int x, int src_width)
@@ -1973,6 +2110,13 @@ static const pixman_fast_path_t c_fast_paths[] =
 
     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
 
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
+
 #define NEAREST_FAST_PATH(op,s,d)              \
     {   PIXMAN_OP_ ## op,                      \
        PIXMAN_ ## s, SCALED_NEAREST_FLAGS,     \
-- 
1.7.3.4

_______________________________________________
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman

Reply via email to