Before After Mean StdDev Mean StdDev Confidence Change L1 68.5 1.0 116.3 0.6 100.0% +69.8% L2 31.1 1.8 60.9 5.0 100.0% +96.1% M 33.6 0.1 86.4 0.4 100.0% +157.0% HT 19.1 0.1 35.3 0.4 100.0% +84.3% VT 17.7 0.2 32.1 0.3 100.0% +81.3% R 17.5 0.2 29.9 0.3 100.0% +70.7% RT 7.0 0.1 11.8 0.3 100.0% +68.4%
Trimmed cairo-pref-traces does not show any significant change for this patch, reflecting the fact that src_x888_0565 is not used in the traces. --- pixman/pixman-arm-simd-asm.S | 77 ++++++++++++++++++++++++++++++++++++++++++ pixman/pixman-arm-simd.c | 7 ++++ 2 files changed, 84 insertions(+) diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S index 5a99264..d790cd8 100644 --- a/pixman/pixman-arm-simd-asm.S +++ b/pixman/pixman-arm-simd-asm.S @@ -303,6 +303,83 @@ generate_composite_function \ /******************************************************************************/ +.macro src_x888_0565_init + /* Hold loop invariant in MASK */ + ldr MASK, =0x001F001F + line_saved_regs STRIDE_S, ORIG_W +.endm + +.macro src_x888_0565_1pixel s, d + and WK&d, MASK, WK&s, lsr #3 @ 00000000000rrrrr00000000000bbbbb + and STRIDE_S, WK&s, #0xFC00 @ 0000000000000000gggggg0000000000 + orr WK&d, WK&d, WK&d, lsr #5 @ 00000000000-----rrrrr000000bbbbb + orr WK&d, WK&d, STRIDE_S, lsr #5 @ 00000000000-----rrrrrggggggbbbbb + /* Top 16 bits are discarded during the following STRH */ +.endm + +.macro src_x888_0565_2pixels slo, shi, d, tmp + and SCRATCH, WK&shi, #0xFC00 @ 0000000000000000GGGGGG0000000000 + and WK&tmp, MASK, WK&shi, lsr #3 @ 00000000000RRRRR00000000000BBBBB + and WK&shi, MASK, WK&slo, lsr #3 @ 00000000000rrrrr00000000000bbbbb + orr WK&tmp, WK&tmp, WK&tmp, lsr #5 @ 00000000000-----RRRRR000000BBBBB + orr WK&tmp, WK&tmp, SCRATCH, lsr #5 @ 00000000000-----RRRRRGGGGGGBBBBB + and SCRATCH, WK&slo, #0xFC00 @ 0000000000000000gggggg0000000000 + orr WK&shi, WK&shi, WK&shi, lsr #5 @ 00000000000-----rrrrr000000bbbbb + orr WK&shi, WK&shi, SCRATCH, lsr #5 @ 00000000000-----rrrrrggggggbbbbb + pkhbt WK&d, WK&shi, WK&tmp, lsl #16 @ RRRRRGGGGGGBBBBBrrrrrggggggbbbbb +.endm + +.macro src_x888_0565_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + WK4 .req STRIDE_S + WK5 .req STRIDE_M + WK6 .req WK3 + WK7 .req ORIG_W + .if numbytes == 16 + pixld , 16, 4, SRC, 0 + src_x888_0565_2pixels 4, 5, 0, 0 + pixld , 8, 4, SRC, 0 + src_x888_0565_2pixels 6, 7, 1, 1 + pixld , 8, 6, SRC, 0 + .else + pixld , numbytes*2, 4, SRC, 0 + .endif +.endm + +.macro src_x888_0565_process_tail cond, numbytes, firstreg + .if numbytes == 16 + src_x888_0565_2pixels 4, 5, 2, 2 + src_x888_0565_2pixels 6, 7, 3, 4 + .elseif numbytes == 8 + src_x888_0565_2pixels 4, 5, 1, 1 + src_x888_0565_2pixels 6, 7, 2, 2 + .elseif numbytes == 4 + src_x888_0565_2pixels 4, 5, 1, 1 + .else + src_x888_0565_1pixel 4, 1 + .endif + .if numbytes == 16 + pixst , numbytes, 0, DST + .else + pixst , numbytes, 1, DST + .endif + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +generate_composite_function \ + pixman_composite_src_x888_0565_asm_armv6, 32, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \ + 3, /* prefetch distance */ \ + src_x888_0565_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + src_x888_0565_process_head, \ + src_x888_0565_process_tail + +/******************************************************************************/ + .macro add_8_8_8pixels cond, dst1, dst2 uqadd8&cond WK&dst1, WK&dst1, MASK uqadd8&cond WK&dst2, WK&dst2, STRIDE_M diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c index dd11838..b2cb859 100644 --- a/pixman/pixman-arm-simd.c +++ b/pixman/pixman-arm-simd.c @@ -41,6 +41,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8_8, uint8_t, 1, uint8_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_8888, uint16_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_x888_0565, + uint32_t, 1, uint16_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, uint8_t, 1, uint8_t, 1) @@ -228,6 +230,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] = PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, armv6_composite_src_0565_8888), PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, armv6_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, armv6_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, armv6_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888), -- 1.7.10.4 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman