This supports a1r5g5b5 source images. lowlevel-blt-bench results for src_1555_8888, which does not yet have a dedicated fast path:
Before After Mean StdDev Mean StdDev Confidence Change L1 24.5 0.2 57.0 1.1 100.0% +132.2% L2 19.3 0.4 41.4 1.0 100.0% +114.3% M 20.4 0.0 49.8 0.1 100.0% +144.7% HT 12.8 0.1 21.4 0.3 100.0% +67.0% VT 12.7 0.1 21.0 0.3 100.0% +65.4% R 12.1 0.1 19.7 0.2 100.0% +63.1% RT 5.6 0.1 7.0 0.2 100.0% +24.8% --- pixman/pixman-arm-simd-asm.S | 70 ++++++++++++++++++++++++++++++++++++++++++ pixman/pixman-arm-simd.c | 13 ++++++++ 2 files changed, 83 insertions(+), 0 deletions(-) diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S index b251187..6674a9d 100644 --- a/pixman/pixman-arm-simd-asm.S +++ b/pixman/pixman-arm-simd-asm.S @@ -2744,3 +2744,73 @@ generate_composite_function_single_scanline \ inout_reverse_process_tail /******************************************************************************/ + +.macro src_1555_8888_init + /* Hold loop invariant in MASK */ + ldr MASK, =0x83E083E0 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + msr CPSR_s, #0x50000 +.endm + +.macro src_1555_8888_2pixels reg1, reg2, tmp1, tmp2, mask + bic WK®2, WK®1, mask @ 0RRRRR00000BBBBB0rrrrr00000bbbbb + and tmp1, WK®1, mask @ A00000GGGGG00000a00000ggggg00000 + mov tmp2, WK®2, lsr #16 @ 00000000000000000RRRRR00000BBBBB + orr tmp1, tmp1, tmp1, lsr #5 @ A0000-GGGGGGGGGGa0000-gggggggggg + uxth WK®2, WK®2 @ 00000000000000000rrrrr00000bbbbb + mov WK®1, tmp1, lsl #16 @ a0000-gggggggggg0000000000000000 + orr tmp2, tmp2, tmp2, lsl #5 @ 000000000000RRRRRRRRRRBBBBBBBBBB + orr WK®2, WK®2, WK®2, lsl #5 @ 000000000000rrrrrrrrrrbbbbbbbbbb + mov tmp1, tmp1, asr #10 @ AAAAAAAAAAA0000-GGGGGGGGGG------ + pkhbt tmp2, tmp2, tmp2, lsl #4 @ 00000000RRRRRRRR------BBBBBBBBBB + pkhbt WK®2, WK®2, WK®2, lsl #4 @ 00000000rrrrrrrr------bbbbbbbbbb + mov WK®1, WK®1, asr #10 @ aaaaaaaaaaa0000-gggggggggg000000 + pkhtb tmp2, tmp2, tmp2, asr #2 @ 00000000RRRRRRRR--------BBBBBBBB + pkhtb WK®2, WK®2, WK®2, asr #2 @ 00000000rrrrrrrr--------bbbbbbbb + sel WK®1, WK®2, WK®1 @ aaaaaaaarrrrrrrrggggggggbbbbbbbb + sel WK®2, tmp2, tmp1 @ AAAAAAAARRRRRRRRGGGGGGGGBBBBBBBB +.endm + +.macro src_1555_8888_1pixel reg, tmp, mask + bic tmp, WK®, mask @ 00000000000000000rrrrr00000bbbbb + and WK®, mask, WK®, lsl #16 @ a00000ggggg000000000000000000000 + orr tmp, tmp, tmp, lsl #5 @ 000000000000rrrrrrrrrrbbbbbbbbbb + orr WK®, WK®, lsr #5 @ a0000-gggggggggg0000000000000000 + pkhbt tmp, tmp, tmp, lsl #4 @ 00000000rrrrrrrr------bbbbbbbbbb + mov WK®, WK®, asr #10 @ aaaaaaaaaaa0000-gggggggggg000000 + pkhtb tmp, tmp, tmp, asr #2 @ 00000000rrrrrrrr--------bbbbbbbb + sel WK®, tmp, WK® @ aaaaaaaarrrrrrrrggggggggbbbbbbbb +.endm + +.macro src_1555_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + .if numbytes == 16 + pixldst ld,, 8, firstreg, %(firstreg+2),,, SRC, unaligned_src + .elseif numbytes == 8 + pixld , 4, firstreg, SRC, unaligned_src + .elseif numbytes == 4 + pixld , 2, firstreg, SRC, unaligned_src + .endif +.endm + +.macro src_1555_8888_process_tail cond, numbytes, firstreg + .if numbytes == 16 + src_1555_8888_2pixels firstreg, %(firstreg+1), STRIDE_M, SCRATCH, MASK + src_1555_8888_2pixels %(firstreg+2), %(firstreg+3), STRIDE_M, SCRATCH, MASK + .elseif numbytes == 8 + src_1555_8888_2pixels firstreg, %(firstreg+1), STRIDE_M, SCRATCH, MASK + .else + src_1555_8888_1pixel firstreg, SCRATCH, MASK + .endif +.endm + +generate_composite_function_single_scanline \ + pixman_get_scanline_a1r5g5b5_asm_armv6, 16, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER, \ + 3, /* prefetch distance */ \ + src_1555_8888_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + src_1555_8888_process_head, \ + src_1555_8888_process_tail + +/******************************************************************************/ diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c index 514231a..e6c5d81 100644 --- a/pixman/pixman-arm-simd.c +++ b/pixman/pixman-arm-simd.c @@ -111,6 +111,7 @@ PIXMAN_ARM_BIND_COMBINE_U (armv6, add) PIXMAN_ARM_BIND_GET_SCANLINE (armv6, r5g6b5) PIXMAN_ARM_BIND_WRITE_BACK (armv6, r5g6b5) +PIXMAN_ARM_BIND_GET_SCANLINE (armv6, a1r5g5b5) PIXMAN_ARM_BIND_GET_SCANLINE (armv6, a8) void @@ -357,6 +358,18 @@ static const pixman_iter_info_t arm_simd_iters[] = armv6_write_back_r5g6b5 }, + { PIXMAN_a1r5g5b5, + (FAST_PATH_STANDARD_FLAGS | + FAST_PATH_ID_TRANSFORM | + FAST_PATH_NEAREST_FILTER | + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | + FAST_PATH_BITS_IMAGE), + ITER_NARROW | ITER_SRC, + _pixman_iter_init_bits_stride, + armv6_get_scanline_a1r5g5b5, + NULL + }, + { PIXMAN_a8, (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | -- 1.7.5.4 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman