Sorry for your confusoin, but I thought the "tiny" patch should be sent before my completed (ver.4) patch to detect some regression could happen with a larger patch.
If the patch (ver.4) is no problem, please ignore this (v3.1) patch. On 18 April 2016 at 18:58, Siarhei Siamashka <siarhei.siamas...@gmail.com> wrote: > On Thu, 14 Apr 2016 22:20:24 +0900 > Mizuki Asakura <ed6e1...@gmail.com> wrote: > >> Since aarch64 has different neon syntax from aarch32 and has no >> support for (older) arm-simd, >> there are no SIMD accelerations for pixman on aarch64. >> >> We need new implementations. >> >> This patch only contains FAST_PATH codes, not bilinear optimizations codes. >> After completing optimization this patch, bilinear related codes should be >> done. >> >> >> This patch contains additional optimization from my previous patch >> to omit using unncessary register movings. > > If I understand it correctly, your patch removes the differences > between the 32-bit (*) and the 64-bit assembly code variants for > the instructions where the barrel shifter argument is in use. > > (*) Assuming that the assembly syntax differences are addressed via > https://lists.freedesktop.org/archives/pixman/2016-April/004489.html > > It's hard to review incremental patches like this. For example, > we can't see if all of the occurrences of these instructions are > really fixed and none are left out. A side by side comparison of > the final 64-bit assembly code with the existing 32-bit assembly > code still needs to be used. > >> Added: https://bugs.freedesktop.org/show_bug.cgi?id=94758 >> Signed-off-by: Mizuki Asakura <ed6e117f at gmail.com> >> --- >> diff -ruNp a/pixman/pixman/pixman-arma64-neon-asm.S >> b/pixman/pixman/pixman-arma64-neon-asm.S >> --- a/pixman/pixman/pixman-arma64-neon-asm.S 2016-04-14 >> 22:09:47.120752451 +0900 >> +++ b/pixman/pixman/pixman-arma64-neon-asm.S 2016-04-14 >> 22:06:45.092222137 +0900 >> @@ -3132,8 +3132,7 @@ generate_composite_function_nearest_scan >> .macro bilinear_load_8888 reg1, reg2, tmp >> asr TMP1, X, #16 >> add X, X, UX >> - lsl TMP2, TMP1, #2 >> - add TMP1, TOP, TMP2 >> + add TMP1, TOP, TMP1, lsl #2 >> ld1 {®1&.2s}, [TMP1], STRIDE >> ld1 {®2&.2s}, [TMP1] >> .endm >> @@ -3141,8 +3140,7 @@ generate_composite_function_nearest_scan >> .macro bilinear_load_0565 reg1, reg2, tmp >> asr TMP1, X, #16 >> add X, X, UX >> - lsl TMP2, TMP1, #1 >> - add TMP1, TOP, TMP2 >> + add TMP1, TOP, TMP1, lsl #1 >> ld1 {®2&.s}[0], [TMP1], STRIDE >> ld1 {®2&.s}[1], [TMP1] >> convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp >> diff -ruNp 160407/pixman/pixman/pixman-arma64-neon-asm.h >> 160408/pixman/pixman/pixman-arma64-neon-asm.h >> --- a/pixman/pixman/pixman-arma64-neon-asm.h 2016-04-14 >> 22:09:47.080752305 +0900 >> +++ b/pixman/pixman/pixman-arma64-neon-asm.h 2016-04-14 >> 22:06:45.044222036 +0900 >> @@ -231,16 +231,14 @@ >> 5: subs VX, VX, SRC_WIDTH_FIXED >> bpl 5b >> 55: >> - lsl DUMMY, TMP1, #1 >> - add TMP1, mem_operand, DUMMY >> + add TMP1, mem_operand, TMP1, lsl #1 >> asr TMP2, VX, #16 >> adds VX, VX, UNIT_X >> bmi 55f >> 5: subs VX, VX, SRC_WIDTH_FIXED >> bpl 5b >> 55: >> - lsl DUMMY, TMP2, #1 >> - add TMP2, mem_operand, DUMMY >> + add TMP2, mem_operand, TMP2, lsl #1 >> ld1 {v®1&.h}[0], [TMP1] >> asr TMP1, VX, #16 >> adds VX, VX, UNIT_X >> @@ -248,8 +246,7 @@ >> 5: subs VX, VX, SRC_WIDTH_FIXED >> bpl 5b >> 55: >> - lsl DUMMY, TMP1, #1 >> - add TMP1, mem_operand, DUMMY >> + add TMP1, mem_operand, TMP1, lsl #1 >> ld1 {v®1&.h}[1], [TMP2] >> asr TMP2, VX, #16 >> adds VX, VX, UNIT_X >> @@ -257,8 +254,7 @@ >> 5: subs VX, VX, SRC_WIDTH_FIXED >> bpl 5b >> 55: >> - lsl DUMMY, TMP2, #1 >> - add TMP2, mem_operand, DUMMY >> + add TMP2, mem_operand, TMP2, lsl #1 >> ld1 {v®1&.h}[2], [TMP1] >> ld1 {v®1&.h}[3], [TMP2] >> .elseif elem_size == 32 >> @@ -268,16 +264,14 @@ >> 5: subs VX, VX, SRC_WIDTH_FIXED >> bpl 5b >> 55: >> - lsl DUMMY, TMP1, #2 >> - add TMP1, mem_operand, DUMMY >> + add TMP1, mem_operand, TMP1, lsl #2 >> asr TMP2, VX, #16 >> adds VX, VX, UNIT_X >> bmi 55f >> 5: subs VX, VX, SRC_WIDTH_FIXED >> bpl 5b >> 55: >> - lsl DUMMY, TMP2, #2 >> - add TMP2, mem_operand, DUMMY >> + add TMP2, mem_operand, TMP2, lsl #2 >> ld1 {v®1&.s}[0], [TMP1] >> ld1 {v®1&.s}[1], [TMP2] >> .else >> @@ -317,8 +311,7 @@ >> 5: subs VX, VX, SRC_WIDTH_FIXED >> bpl 5b >> 55: >> - lsl DUMMY, TMP1, #1 >> - add TMP1, mem_operand, DUMMY >> + add TMP1, mem_operand, TMP1, lsl #1 >> ld1 {v®1&.h}[idx], [TMP1] >> .elseif elem_size == 32 >> asr DUMMY, VX, #16 >> @@ -328,8 +321,7 @@ >> 5: subs VX, VX, SRC_WIDTH_FIXED >> bpl 5b >> 55: >> - lsl DUMMY, TMP1, #2 >> - add TMP1, mem_operand, DUMMY >> + add TMP1, mem_operand, TMP1, lsl #2 >> ld1 {v®1&.s}[idx], [TMP1] >> .endif >> .endm >> @@ -638,27 +630,21 @@ local skip1 >> */ >> .macro advance_to_next_scanline start_of_loop_label >> mov W, ORIG_W >> - lsl DUMMY, DST_STRIDE, #dst_bpp_shift >> - add DST_W, DST_W, DUMMY >> + add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift >> .if src_bpp != 0 >> - lsl DUMMY, SRC_STRIDE, #src_bpp_shift >> - add SRC, SRC, DUMMY >> + add SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift >> .endif >> .if mask_bpp != 0 >> - lsl DUMMY, MASK_STRIDE, #mask_bpp_shift >> - add MASK, MASK, DUMMY >> + add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift >> .endif >> .if (dst_w_bpp != 24) >> - lsl DUMMY, W, #dst_bpp_shift >> - sub DST_W, DST_W, DUMMY >> + sub DST_W, DST_W, W, lsl #dst_bpp_shift >> .endif >> .if (src_bpp != 24) && (src_bpp != 0) >> - lsl DUMMY, W, #src_bpp_shift >> - sub SRC, SRC, DUMMY >> + sub SRC, SRC, W, lsl #src_bpp_shift >> .endif >> .if (mask_bpp != 24) && (mask_bpp != 0) >> - lsl DUMMY, W, #mask_bpp_shift >> - sub MASK, MASK, DUMMY >> + sub MASK, MASK, W, lsl #mask_bpp_shift >> .endif >> subs H, H, #1 >> mov DST_R, DST_W >> > > > > -- > Best regards, > Siarhei Siamashka _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman