On Friday 05 August 2005 05:59 pm, Mike Frysinger wrote: > find attached a PoC patch ive been toying with lately for asm_blend.S which > fixes each memory reference so that it is PIC aware. i can understand if > some people could care less if their library has textrel's in them and > would rather sacrifice a bit of slowdown at load time rather than slowdown > at runtime
i reviewed the stuff with some other Gentoo devs (Ned Ludd and Kevin F. Quinn) and the PaX team and we've come up with a much nicer patch which shouldnt really shouldnt add significant overhead to the mmx routines :) the trick here is to go from doing: .data mVX000000: .byte 0, 0, 0, 0, 0, 0, 255, 127 m00XXXXXX: .byte 255, 255, 255, 255, 255, 255, 0, 0 .text movq mVX000000, %mm5 movq m00XXXXXX, %mm6 to this: .text pushl $0x7FFF0000 pushl $0x00000000 movq (%esp), %mm5 pushl $0x0000FFFF pushl $0xFFFFFFFF movq (%esp), %mm6 addl $16, %esp of course, these pretty little details are macro-ed out so in the source files you simply do: LOAD_IMMQ(mVX000000, %mm5) LOAD_IMMQ(m00XXXXXX, %mm6) CLEANUP_IMMQ_LOADS(2) -mike
Index: asm_blend.S =================================================================== RCS file: /cvsroot/enlightenment/e17/libs/imlib2/src/lib/asm_blend.S,v retrieving revision 1.3 diff -u -p -r1.3 asm_blend.S --- asm_blend.S 6 Aug 2005 20:30:27 -0000 1.3 +++ asm_blend.S 6 Aug 2005 23:47:11 -0000 @@ -85,21 +85,10 @@ .type PT_(imlib_mmx_reshade_copy_rgba_to_rgba,@function) .globl PR_(imlib_mmx_reshade_copy_rgb_to_rgba) .type PT_(imlib_mmx_reshade_copy_rgb_to_rgba,@function) - -/*\ Some useful masks \*/ -m0X000000: .byte 0, 0, 0, 0, 0, 0, 255, 0 -m10000000: .byte 0, 0, 0, 0, 0, 0, 0, 1 -m00XXXXXX: .byte 255, 255, 255, 255, 255, 255, 0, 0 -mVX000000: .byte 0, 0, 0, 0, 0, 0, 255, 127 -mV0000000: .byte 0, 0, 0, 0, 0, 0, 0, 128 -m0XXX0XXX: .byte 255, 255, 255, 0, 255, 255, 255, 0 -mX000X000: .byte 0, 0, 0, 255, 0, 0, 0, 255 -m10001000: .byte 0, 0, 0, 1, 0, 0, 0, 1 -m000V0V0V: .byte 127, 0, 127, 0, 127, 0, 0, 0 -mI0000000: .byte 0, 0, 0, 0, 0, 0, 0, 64 -m0VVV0VVV: .byte 127, 127, 127, 0, 127, 127, 127, 0 -c1: .word 0x1, 0x1, 0x1, 0x1 +#include "asm_loadimmq.S" + + /*\ MMX register use: |*| %mm1 = Source value |*| %mm2 = Destination value @@ -162,7 +151,8 @@ PR_(imlib_mmx_blend_rgba_to_rgb): ENTER pxor %mm4, %mm4 - movq c1, %mm5 + LOAD_IMMQ(c1, %mm5) + CLEANUP_IMMQ_LOADS(1) LOOP_START 1: @@ -218,9 +208,10 @@ PR_(imlib_mmx_blend_rgba_to_rgba): ENTER pxor %mm4, %mm4 - movq m0X000000, %mm5 - movq m00XXXXXX, %mm6 - movq c1, %mm7 + LOAD_IMMQ(m0X000000, %mm5) + LOAD_IMMQ(m00XXXXXX, %mm6) + LOAD_IMMQ(c1, %mm7) + CLEANUP_IMMQ_LOADS(3) LOOP_START 1: @@ -272,8 +263,9 @@ SIZE(imlib_mmx_blend_rgba_to_rgba) PR_(imlib_mmx_copy_rgba_to_rgb): ENTER - movq m0XXX0XXX, %mm5 - movq mX000X000, %mm6 + LOAD_IMMQ(m0XXX0XXX, %mm5) + LOAD_IMMQ(mX000X000, %mm6) + CLEANUP_IMMQ_LOADS(2) /*\ Two at a time: last item is at %ecx = 0 \*/ subl $4, %esi @@ -342,7 +334,8 @@ SIZE(imlib_mmx_copy_rgba_to_rgba) PR_(imlib_mmx_copy_rgb_to_rgba): ENTER - movq mX000X000, %mm5 + LOAD_IMMQ(mX000X000, %mm5) + CLEANUP_IMMQ_LOADS(1) subl $4, %esi subl $4, %edi @@ -415,8 +408,9 @@ PR_(imlib_mmx_add_blend_rgba_to_rgba): ENTER pxor %mm4, %mm4 - movq mVX000000, %mm5 - movq m00XXXXXX, %mm6 + LOAD_IMMQ(mVX000000, %mm5) + LOAD_IMMQ(m00XXXXXX, %mm6) + CLEANUP_IMMQ_LOADS(2) LOOP_START 1: @@ -463,7 +457,8 @@ SIZE(imlib_mmx_add_blend_rgba_to_rgba) PR_(imlib_mmx_add_copy_rgba_to_rgb): ENTER - movq m0XXX0XXX, %mm5 + LOAD_IMMQ(m0XXX0XXX, %mm5) + CLEANUP_IMMQ_LOADS(1) subl $4, %esi subl $4, %edi @@ -533,7 +528,8 @@ SIZE(imlib_mmx_add_copy_rgba_to_rgba) PR_(imlib_mmx_add_copy_rgb_to_rgba): ENTER - movq mX000X000, %mm5 + LOAD_IMMQ(mX000X000, %mm5) + CLEANUP_IMMQ_LOADS(1) subl $4, %esi subl $4, %edi @@ -613,8 +609,9 @@ PR_(imlib_mmx_subtract_blend_rgba_to_rgb ENTER pxor %mm4, %mm4 - movq mV0000000, %mm5 - movq m00XXXXXX, %mm6 + LOAD_IMMQ(mV0000000, %mm5) + LOAD_IMMQ(m00XXXXXX, %mm6) + CLEANUP_IMMQ_LOADS(2) LOOP_START 1: @@ -661,7 +658,8 @@ SIZE(imlib_mmx_subtract_blend_rgba_to_rg PR_(imlib_mmx_subtract_copy_rgba_to_rgb): ENTER - movq m0XXX0XXX, %mm5 + LOAD_IMMQ(m0XXX0XXX, %mm5) + CLEANUP_IMMQ_LOADS(1) subl $4, %esi subl $4, %edi @@ -699,7 +697,8 @@ SIZE(imlib_mmx_subtract_copy_rgba_to_rgb PR_(imlib_mmx_subtract_copy_rgba_to_rgba): ENTER - movq mX000X000, %mm5 + LOAD_IMMQ(mX000X000, %mm5) + CLEANUP_IMMQ_LOADS(1) subl $4, %esi subl $4, %edi @@ -741,7 +740,8 @@ SIZE(imlib_mmx_subtract_copy_rgba_to_rgb PR_(imlib_mmx_subtract_copy_rgb_to_rgba): ENTER - movq mX000X000, %mm5 + LOAD_IMMQ(mX000X000, %mm5) + CLEANUP_IMMQ_LOADS(1) subl $4, %esi subl $4, %edi @@ -780,7 +780,8 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgb) ENTER pxor %mm4, %mm4 - movq m000V0V0V, %mm6 + LOAD_IMMQ(m000V0V0V, %mm6) + CLEANUP_IMMQ_LOADS(1) LOOP_START 1: @@ -823,9 +824,10 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgba ENTER pxor %mm4, %mm4 - movq mI0000000, %mm5 - movq m000V0V0V, %mm6 - movq m00XXXXXX, %mm7 + LOAD_IMMQ(mI0000000, %mm5) + LOAD_IMMQ(m000V0V0V, %mm6) + LOAD_IMMQ(m00XXXXXX, %mm7) + CLEANUP_IMMQ_LOADS(3) LOOP_START 1: @@ -875,8 +877,9 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgb): ENTER pxor %mm4, %mm4 - movq m0XXX0XXX, %mm5 - movq m0VVV0VVV, %mm6 + LOAD_IMMQ(m0XXX0XXX, %mm5) + LOAD_IMMQ(m0VVV0VVV, %mm6) + CLEANUP_IMMQ_LOADS(2) subl $4, %esi subl $4, %edi @@ -939,8 +942,9 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgba) ENTER pxor %mm4, %mm4 - movq m0XXX0XXX, %mm5 - movq m0VVV0VVV, %mm6 + LOAD_IMMQ(m0XXX0XXX, %mm5) + LOAD_IMMQ(m0VVV0VVV, %mm6) + CLEANUP_IMMQ_LOADS(2) subl $4, %esi subl $4, %edi @@ -1004,9 +1008,10 @@ PR_(imlib_mmx_reshade_copy_rgb_to_rgba): ENTER pxor %mm4, %mm4 - movq m0XXX0XXX, %mm5 - movq m0VVV0VVV, %mm6 - movq mX000X000, %mm7 + LOAD_IMMQ(m0XXX0XXX, %mm5) + LOAD_IMMQ(m0VVV0VVV, %mm6) + LOAD_IMMQ(mX000X000, %mm7) + CLEANUP_IMMQ_LOADS(3) subl $4, %esi subl $4, %edi Index: asm_blend_cmod.S =================================================================== RCS file: /cvsroot/enlightenment/e17/libs/imlib2/src/lib/asm_blend_cmod.S,v retrieving revision 1.3 diff -u -p -r1.3 asm_blend_cmod.S --- asm_blend_cmod.S 6 Aug 2005 20:30:27 -0000 1.3 +++ asm_blend_cmod.S 6 Aug 2005 23:47:12 -0000 @@ -112,21 +112,9 @@ .type PT_(imlib_mmx_reshade_copy_rgba_to_rgba_cmod,@function) .globl PR_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod) .type PT_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod,@function) - -/*\ Some useful masks \*/ -m0X000000: .byte 0, 0, 0, 0, 0, 0, 255, 0 -m10000000: .byte 0, 0, 0, 0, 0, 0, 0, 1 -m00XXXXXX: .byte 255, 255, 255, 255, 255, 255, 0, 0 -mVX000000: .byte 0, 0, 0, 0, 0, 0, 255, 127 -mV0000000: .byte 0, 0, 0, 0, 0, 0, 0, 128 -m0XXX0XXX: .byte 255, 255, 255, 0, 255, 255, 255, 0 -mX000X000: .byte 0, 0, 0, 255, 0, 0, 0, 255 -m10001000: .byte 0, 0, 0, 1, 0, 0, 0, 1 -m000V0V0V: .byte 127, 0, 127, 0, 127, 0, 0, 0 -mI0000000: .byte 0, 0, 0, 0, 0, 0, 0, 64 -m0VVV0VVV: .byte 127, 127, 127, 0, 127, 127, 127, 0 -c1: .word 0x1, 0x1, 0x1, 0x1 +#include "asm_loadimmq.S" + /*\ MMX register use: |*| %mm1 = Source value |*| %mm2 = Destination value @@ -364,7 +352,8 @@ PR_(imlib_mmx_blend_rgba_to_rgb_cmod): ENTER pxor %mm4, %mm4 - movq c1, %mm5 + LOAD_IMMQ(c1, %mm5) + CLEANUP_IMMQ_LOADS(1) LOOP_START 1: @@ -420,9 +409,10 @@ PR_(imlib_mmx_blend_rgba_to_rgba_cmod): ENTER pxor %mm4, %mm4 - movq m0X000000, %mm5 - movq m00XXXXXX, %mm6 - movq c1, %mm7 + LOAD_IMMQ(m0X000000, %mm5) + LOAD_IMMQ(m00XXXXXX, %mm6) + LOAD_IMMQ(c1, %mm7) + CLEANUP_IMMQ_LOADS(3) LOOP_START 1: @@ -475,7 +465,8 @@ PR_(imlib_mmx_blend_rgb_to_rgb_cmod): ENTER pxor %mm4, %mm4 - movq c1, %mm5 + LOAD_IMMQ(c1, %mm5) + CLEANUP_IMMQ_LOADS(1) /*\ Load alpha beforehand, as it's always amap(0xff) \*/ movzbl amap_ff, %eax @@ -519,9 +510,10 @@ PR_(imlib_mmx_blend_rgb_to_rgba_cmod): ENTER pxor %mm4, %mm4 - movq m0X000000, %mm5 - movq m00XXXXXX, %mm6 - movq c1, %mm7 + LOAD_IMMQ(m0X000000, %mm5) + LOAD_IMMQ(m00XXXXXX, %mm6) + LOAD_IMMQ(c1, %mm7) + CLEANUP_IMMQ_LOADS(3) LOOP_START 1: @@ -688,8 +680,9 @@ PR_(imlib_mmx_add_blend_rgba_to_rgba_cmo ENTER pxor %mm4, %mm4 - movq mVX000000, %mm5 - movq m00XXXXXX, %mm6 + LOAD_IMMQ(mVX000000, %mm5) + LOAD_IMMQ(m00XXXXXX, %mm6) + CLEANUP_IMMQ_LOADS(2) LOOP_START 1: @@ -778,8 +771,9 @@ PR_(imlib_mmx_add_blend_rgb_to_rgba_cmod ENTER pxor %mm4, %mm4 - movq mVX000000, %mm5 - movq m00XXXXXX, %mm6 + LOAD_IMMQ(mVX000000, %mm5) + LOAD_IMMQ(m00XXXXXX, %mm6) + CLEANUP_IMMQ_LOADS(2) LOOP_START 1: @@ -826,7 +820,8 @@ SIZE(imlib_mmx_add_blend_rgb_to_rgba_cmo PR_(imlib_mmx_add_copy_rgba_to_rgb_cmod): ENTER - movq m0XXX0XXX, %mm5 + LOAD_IMMQ(m0XXX0XXX, %mm5) + CLEANUP_IMMQ_LOADS(1) subl $4, %esi subl $4, %edi @@ -970,8 +965,9 @@ PR_(imlib_mmx_subtract_blend_rgba_to_rgb ENTER pxor %mm4, %mm4 - movq mV0000000, %mm5 - movq m00XXXXXX, %mm6 + LOAD_IMMQ(mV0000000, %mm5) + LOAD_IMMQ(m00XXXXXX, %mm6) + CLEANUP_IMMQ_LOADS(2) LOOP_START 1: @@ -1060,8 +1056,9 @@ PR_(imlib_mmx_subtract_blend_rgb_to_rgba ENTER pxor %mm4, %mm4 - movq mV0000000, %mm5 - movq m00XXXXXX, %mm6 + LOAD_IMMQ(mV0000000, %mm5) + LOAD_IMMQ(m00XXXXXX, %mm6) + CLEANUP_IMMQ_LOADS(2) LOOP_START 1: @@ -1108,7 +1105,8 @@ SIZE(imlib_mmx_subtract_blend_rgb_to_rgb PR_(imlib_mmx_subtract_copy_rgba_to_rgb_cmod): ENTER - movq m0XXX0XXX, %mm5 + LOAD_IMMQ(m0XXX0XXX, %mm5) + CLEANUP_IMMQ_LOADS(1) subl $4, %esi subl $4, %edi @@ -1146,7 +1144,8 @@ SIZE(imlib_mmx_subtract_copy_rgba_to_rgb PR_(imlib_mmx_subtract_copy_rgba_to_rgba_cmod): ENTER - movq mX000X000, %mm5 + LOAD_IMMQ(mX000X000, %mm5) + CLEANUP_IMMQ_LOADS(1) subl $4, %esi subl $4, %edi @@ -1188,7 +1187,8 @@ SIZE(imlib_mmx_subtract_copy_rgba_to_rgb PR_(imlib_mmx_subtract_copy_rgb_to_rgba_cmod): ENTER - movq mX000X000, %mm5 + LOAD_IMMQ(mX000X000, %mm5) + CLEANUP_IMMQ_LOADS(1) subl $4, %esi subl $4, %edi @@ -1227,7 +1227,8 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgb_ ENTER pxor %mm4, %mm4 - movq m000V0V0V, %mm6 + LOAD_IMMQ(m000V0V0V, %mm6) + CLEANUP_IMMQ_LOADS(1) LOOP_START 1: @@ -1270,9 +1271,10 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgba ENTER pxor %mm4, %mm4 - movq mI0000000, %mm5 - movq m000V0V0V, %mm6 - movq m00XXXXXX, %mm7 + LOAD_IMMQ(mI0000000, %mm5) + LOAD_IMMQ(m000V0V0V, %mm6) + LOAD_IMMQ(m00XXXXXX, %mm7) + CLEANUP_IMMQ_LOADS(3) LOOP_START 1: @@ -1322,7 +1324,8 @@ PR_(imlib_mmx_reshade_blend_rgb_to_rgb_c ENTER pxor %mm4, %mm4 - movq m000V0V0V, %mm6 + LOAD_IMMQ(m000V0V0V, %mm6) + CLEANUP_IMMQ_LOADS(1) /*\ Load alpha beforehand, as it's always amap(0xff) \*/ movzbl amap_ff, %eax @@ -1365,9 +1368,10 @@ PR_(imlib_mmx_reshade_blend_rgb_to_rgba_ ENTER pxor %mm4, %mm4 - movq mI0000000, %mm5 - movq m000V0V0V, %mm6 - movq m00XXXXXX, %mm7 + LOAD_IMMQ(mI0000000, %mm5) + LOAD_IMMQ(m000V0V0V, %mm6) + LOAD_IMMQ(m00XXXXXX, %mm7) + CLEANUP_IMMQ_LOADS(3) LOOP_START 1: @@ -1417,8 +1421,9 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgb_c ENTER pxor %mm4, %mm4 - movq m0XXX0XXX, %mm5 - movq m0VVV0VVV, %mm6 + LOAD_IMMQ(m0XXX0XXX, %mm5) + LOAD_IMMQ(m0VVV0VVV, %mm6) + CLEANUP_IMMQ_LOADS(2) subl $4, %esi subl $4, %edi @@ -1481,8 +1486,9 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgba_ ENTER pxor %mm4, %mm4 - movq m0XXX0XXX, %mm5 - movq m0VVV0VVV, %mm6 + LOAD_IMMQ(m0XXX0XXX, %mm5) + LOAD_IMMQ(m0VVV0VVV, %mm6) + CLEANUP_IMMQ_LOADS(2) subl $4, %esi subl $4, %edi @@ -1546,8 +1552,9 @@ PR_(imlib_mmx_reshade_copy_rgb_to_rgba_c ENTER pxor %mm4, %mm4 - movq m0XXX0XXX, %mm5 - movq m0VVV0VVV, %mm6 + LOAD_IMMQ(m0XXX0XXX, %mm5) + LOAD_IMMQ(m0VVV0VVV, %mm6) + CLEANUP_IMMQ_LOADS(2) subl $4, %esi subl $4, %edi Index: asm_loadimmq.S =================================================================== RCS file: asm_loadimmq.S diff -N asm_loadimmq.S --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ asm_loadimmq.S 6 Aug 2005 23:47:12 -0000 @@ -0,0 +1,77 @@ +/*\ constants and macros for x86 assembly files. \*/ + + +/*\ Constants for asm_blend.S and asm_blend_cmod.S \*/ +/* m0X000000: .byte 0, 0, 0, 0, 0, 0, 255, 0 */ +#define m0X000000_H 0x00FF0000 +#define m0X000000_L 0x00000000 +/* m10000000: .byte 0, 0, 0, 0, 0, 0, 0, 1 */ +#define m10000000_H 0x01000000 +#define m10000000_L 0x00000000 +/* m00XXXXXX: .byte 255, 255, 255, 255, 255, 255, 0, 0 */ +#define m00XXXXXX_H 0x0000FFFF +#define m00XXXXXX_L 0xFFFFFFFF +/* mVX000000: .byte 0, 0, 0, 0, 0, 0, 255, 127 */ +#define mVX000000_H 0x7FFF0000 +#define mVX000000_L 0x00000000 +/* mV0000000: .byte 0, 0, 0, 0, 0, 0, 0, 128 */ +#define mV0000000_H 0x80000000 +#define mV0000000_L 0x00000000 +/* m0XXX0XXX: .byte 255, 255, 255, 0, 255, 255, 255, 0 */ +#define m0XXX0XXX_H 0x00FFFFFF +#define m0XXX0XXX_L 0x00FFFFFF +/* mX000X000: .byte 0, 0, 0, 255, 0, 0, 0, 255 */ +#define mX000X000_H 0xFF000000 +#define mX000X000_L 0xFF000000 +/* m10001000: .byte 0, 0, 0, 1, 0, 0, 0, 1 */ +#define m10001000_H 0x01000000 +#define m10001000_L 0x01000000 +/* m000V0V0V: .byte 127, 0, 127, 0, 127, 0, 0, 0 */ +#define m000V0V0V_H 0x0000007F +#define m000V0V0V_L 0x007F007F +/* mI0000000: .byte 0, 0, 0, 0, 0, 0, 0, 64 */ +#define mI0000000_H 0x40000000 +#define mI0000000_L 0x00000000 +/* m0VVV0VVV: .byte 127, 127, 127, 0, 127, 127, 127, 0 */ +#define m0VVV0VVV_H 0x007F7F7F +#define m0VVV0VVV_L 0x007F7F7F +/* c1: .word 0x1, 0x1, 0x1, 0x1 */ +#define c1_H 0x00010001 +#define c1_L 0x00010001 + + +/*\ Constants for asm_rgba.S \*/ +/* m_rb: .long 0x00f800f8, 0x00f800f8 */ +#define m_rb_H 0x00f800f8 +#define m_rb_L 0x00f800f8 +/* m_r: .long 0xf800f800, 0xf800f800 */ +#define m_r_H 0xf800f800 +#define m_r_L 0xf800f800 +/* m_g6: .long 0x0000fc00, 0x0000fc00 */ +#define m_g6_H 0x0000fc00 +#define m_g6_L 0x0000fc00 +/* m_g5: .long 0x0000f800, 0x0000f800 */ +#define m_g5_H 0x0000f800 +#define m_g5_L 0x0000f800 +/*\ Multiply constants to fake two shifts at once \*/ +/* mul_rgb565: .long 0x20000004, 0x20000004 */ +#define mul_rgb565_H 0x20000004 +#define mul_rgb565_L 0x20000004 +/* mul_bgr565: .long 0x00042000, 0x00042000 */ +#define mul_bgr565_H 0x00042000 +#define mul_bgr565_L 0x00042000 +/* mul_rgb555: .long 0x20000008, 0x20000008 */ +#define mul_rgb555_H 0x20000008 +#define mul_rgb555_L 0x20000008 +/* mul_bgr555: .long 0x00082000, 0x00082000 */ +#define mul_bgr555_H 0x00082000 +#define mul_bgr555_L 0x00082000 + + +/*\ Load an 8-byte constant to an mmx register \*/ +#define LOAD_IMMQ(mask, reg) \ + pushl $mask##_H ;\ + pushl $mask##_L ;\ + movq (%esp), reg +#define CLEANUP_IMMQ_LOADS(num_loaded) \ + addl $ num_loaded * 8, %esp Index: asm_rgba.S =================================================================== RCS file: /cvsroot/enlightenment/e17/libs/imlib2/src/lib/asm_rgba.S,v retrieving revision 1.3 diff -u -p -r1.3 asm_rgba.S --- asm_rgba.S 6 Aug 2005 20:30:27 -0000 1.3 +++ asm_rgba.S 6 Aug 2005 23:47:12 -0000 @@ -55,16 +55,7 @@ .globl PR_(imlib_get_cpuid) .type PT_(imlib_get_cpuid,@function) -/*\ Some useful masks \*/ -m_rb: .long 0x00f800f8, 0x00f800f8 -m_r: .long 0xf800f800, 0xf800f800 -m_g6: .long 0x0000fc00, 0x0000fc00 -m_g5: .long 0x0000f800, 0x0000f800 -/*\ Multiply constants to fake two shifts at once \*/ -mul_rgb565: .long 0x20000004, 0x20000004 -mul_bgr565: .long 0x00042000, 0x00042000 -mul_rgb555: .long 0x20000008, 0x20000008 -mul_bgr555: .long 0x00082000, 0x00082000 +#include "asm_loadimmq.S" /*\ Common code \*/ /*\ Save registers, load common parameters \*/ @@ -114,18 +105,21 @@ mul_bgr555: .long 0x00082000, 0x00082000 PR_(imlib_mmx_bgr565_fast): - movq mul_bgr565, %mm7 /*\ This constant is the only difference \*/ + LOAD_IMMQ(mul_bgr565, %mm7) /*\ This constant is the only difference \*/ + CLEANUP_IMMQ_LOADS(1) jmp .rgb565_fast_entry SIZE(imlib_mmx_bgr565_fast) PR_(imlib_mmx_rgb565_fast): - movq mul_rgb565, %mm7 + LOAD_IMMQ(mul_rgb565, %mm7) + CLEANUP_IMMQ_LOADS(1) .rgb565_fast_entry: ENTER - movq m_rb, %mm5 - movq m_g6, %mm6 + LOAD_IMMQ(m_rb, %mm5) + LOAD_IMMQ(m_g6, %mm6) + CLEANUP_IMMQ_LOADS(1) LOOP_START @@ -193,18 +187,21 @@ SIZE(imlib_mmx_rgb565_fast) PR_(imlib_mmx_bgr555_fast): - movq mul_bgr555, %mm7 /*\ This constant is the only difference \*/ + LOAD_IMMQ(mul_bgr555, %mm7) /*\ This constant is the only difference \*/ + CLEANUP_IMMQ_LOADS(1) jmp .rgb555_fast_entry SIZE(imlib_mmx_bgr555_fast) PR_(imlib_mmx_rgb555_fast): - movq mul_rgb555, %mm7 + LOAD_IMMQ(mul_rgb555, %mm7) + CLEANUP_IMMQ_LOADS(1) .rgb555_fast_entry: ENTER - movq m_rb, %mm5 - movq m_g5, %mm6 + LOAD_IMMQ(m_rb, %mm5) + LOAD_IMMQ(m_g5, %mm6) + CLEANUP_IMMQ_LOADS(2) LOOP_START