On Friday 05 August 2005 05:59 pm, Mike Frysinger wrote:
> find attached a PoC patch ive been toying with lately for asm_blend.S which
> fixes each memory reference so that it is PIC aware.  i can understand if
> some people could care less if their library has textrel's in them and
> would rather sacrifice a bit of slowdown at load time rather than slowdown
> at runtime

i reviewed the stuff with some other Gentoo devs (Ned Ludd and Kevin F. Quinn) 
and the PaX team and we've come up with a much nicer patch which shouldnt 
really shouldnt add significant overhead to the mmx routines :)

the trick here is to go from doing:
.data
mVX000000: .byte   0,   0,   0,   0,   0,   0, 255, 127
m00XXXXXX: .byte 255, 255, 255, 255, 255, 255,   0,   0
.text
movq mVX000000, %mm5
movq m00XXXXXX, %mm6

to this:
.text
pushl $0x7FFF0000
pushl $0x00000000
movq (%esp), %mm5
pushl $0x0000FFFF
pushl $0xFFFFFFFF
movq (%esp), %mm6
addl $16, %esp

of course, these pretty little details are macro-ed out so in the source files 
you simply do:
LOAD_IMMQ(mVX000000, %mm5)
LOAD_IMMQ(m00XXXXXX, %mm6)
CLEANUP_IMMQ_LOADS(2)
-mike
Index: asm_blend.S
===================================================================
RCS file: /cvsroot/enlightenment/e17/libs/imlib2/src/lib/asm_blend.S,v
retrieving revision 1.3
diff -u -p -r1.3 asm_blend.S
--- asm_blend.S	6 Aug 2005 20:30:27 -0000	1.3
+++ asm_blend.S	6 Aug 2005 23:47:11 -0000
@@ -85,21 +85,10 @@
 	.type PT_(imlib_mmx_reshade_copy_rgba_to_rgba,@function)
 .globl PR_(imlib_mmx_reshade_copy_rgb_to_rgba)
 	.type PT_(imlib_mmx_reshade_copy_rgb_to_rgba,@function)
-	
-/*\ Some useful masks \*/
-m0X000000: .byte   0,   0,   0,   0,   0,   0, 255,   0
-m10000000: .byte   0,   0,   0,   0,   0,   0,   0,   1
-m00XXXXXX: .byte 255, 255, 255, 255, 255, 255,   0,   0
-mVX000000: .byte   0,   0,   0,   0,   0,   0, 255, 127
-mV0000000: .byte   0,   0,   0,   0,   0,   0,   0, 128
-m0XXX0XXX: .byte 255, 255, 255,   0, 255, 255, 255,   0
-mX000X000: .byte   0,   0,   0, 255,   0,   0,   0, 255
-m10001000: .byte   0,   0,   0,   1,   0,   0,   0,   1
-m000V0V0V: .byte 127,   0, 127,   0, 127,   0,   0,   0
-mI0000000: .byte   0,   0,   0,   0,   0,   0,   0,  64
-m0VVV0VVV: .byte 127, 127, 127,   0, 127, 127, 127,   0
-c1: .word 0x1, 0x1, 0x1, 0x1
 
+#include "asm_loadimmq.S"
+
+	
 /*\ MMX register use:
 |*| %mm1 = Source value
 |*| %mm2 = Destination value
@@ -162,7 +151,8 @@ PR_(imlib_mmx_blend_rgba_to_rgb):
 	ENTER
 
 	pxor %mm4, %mm4
-	movq c1, %mm5
+	LOAD_IMMQ(c1, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	LOOP_START
 1:
@@ -218,9 +208,10 @@ PR_(imlib_mmx_blend_rgba_to_rgba):
 	ENTER
 
 	pxor %mm4, %mm4
-	movq m0X000000, %mm5
-	movq m00XXXXXX, %mm6
-	movq c1, %mm7
+	LOAD_IMMQ(m0X000000, %mm5)
+	LOAD_IMMQ(m00XXXXXX, %mm6)
+	LOAD_IMMQ(c1, %mm7)
+	CLEANUP_IMMQ_LOADS(3)
 
 	LOOP_START
 1:
@@ -272,8 +263,9 @@ SIZE(imlib_mmx_blend_rgba_to_rgba)
 PR_(imlib_mmx_copy_rgba_to_rgb):
 	ENTER
 
-	movq m0XXX0XXX, %mm5
-	movq mX000X000, %mm6
+	LOAD_IMMQ(m0XXX0XXX, %mm5)
+	LOAD_IMMQ(mX000X000, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	/*\ Two at a time: last item is at %ecx = 0 \*/
 	subl $4, %esi
@@ -342,7 +334,8 @@ SIZE(imlib_mmx_copy_rgba_to_rgba)
 PR_(imlib_mmx_copy_rgb_to_rgba):
 	ENTER
 
-	movq mX000X000, %mm5
+	LOAD_IMMQ(mX000X000, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -415,8 +408,9 @@ PR_(imlib_mmx_add_blend_rgba_to_rgba):
 	ENTER
 
 	pxor %mm4, %mm4
-	movq mVX000000, %mm5
-	movq m00XXXXXX, %mm6
+	LOAD_IMMQ(mVX000000, %mm5)
+	LOAD_IMMQ(m00XXXXXX, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	LOOP_START
 1:
@@ -463,7 +457,8 @@ SIZE(imlib_mmx_add_blend_rgba_to_rgba)
 PR_(imlib_mmx_add_copy_rgba_to_rgb):
 	ENTER
 
-	movq m0XXX0XXX, %mm5
+	LOAD_IMMQ(m0XXX0XXX, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -533,7 +528,8 @@ SIZE(imlib_mmx_add_copy_rgba_to_rgba)
 PR_(imlib_mmx_add_copy_rgb_to_rgba):
 	ENTER
 
-	movq mX000X000, %mm5
+	LOAD_IMMQ(mX000X000, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -613,8 +609,9 @@ PR_(imlib_mmx_subtract_blend_rgba_to_rgb
 	ENTER
 
 	pxor %mm4, %mm4
-	movq mV0000000, %mm5
-	movq m00XXXXXX, %mm6
+	LOAD_IMMQ(mV0000000, %mm5)
+	LOAD_IMMQ(m00XXXXXX, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	LOOP_START
 1:
@@ -661,7 +658,8 @@ SIZE(imlib_mmx_subtract_blend_rgba_to_rg
 PR_(imlib_mmx_subtract_copy_rgba_to_rgb):
 	ENTER
 
-	movq m0XXX0XXX, %mm5
+	LOAD_IMMQ(m0XXX0XXX, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -699,7 +697,8 @@ SIZE(imlib_mmx_subtract_copy_rgba_to_rgb
 PR_(imlib_mmx_subtract_copy_rgba_to_rgba):
 	ENTER
 
-	movq mX000X000, %mm5
+	LOAD_IMMQ(mX000X000, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -741,7 +740,8 @@ SIZE(imlib_mmx_subtract_copy_rgba_to_rgb
 PR_(imlib_mmx_subtract_copy_rgb_to_rgba):
 	ENTER
 
-	movq mX000X000, %mm5
+	LOAD_IMMQ(mX000X000, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -780,7 +780,8 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgb)
 	ENTER
 
 	pxor %mm4, %mm4
-	movq m000V0V0V, %mm6
+	LOAD_IMMQ(m000V0V0V, %mm6)
+	CLEANUP_IMMQ_LOADS(1)
 
 	LOOP_START
 1:
@@ -823,9 +824,10 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgba
 	ENTER
 
 	pxor %mm4, %mm4
-	movq mI0000000, %mm5
-	movq m000V0V0V, %mm6
-	movq m00XXXXXX, %mm7
+	LOAD_IMMQ(mI0000000, %mm5)
+	LOAD_IMMQ(m000V0V0V, %mm6)
+	LOAD_IMMQ(m00XXXXXX, %mm7)
+	CLEANUP_IMMQ_LOADS(3)
 
 	LOOP_START
 1:
@@ -875,8 +877,9 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgb):
 	ENTER
 
 	pxor %mm4, %mm4
-	movq m0XXX0XXX, %mm5
-	movq m0VVV0VVV, %mm6
+	LOAD_IMMQ(m0XXX0XXX, %mm5)
+	LOAD_IMMQ(m0VVV0VVV, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -939,8 +942,9 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgba)
 	ENTER
 
 	pxor %mm4, %mm4
-	movq m0XXX0XXX, %mm5
-	movq m0VVV0VVV, %mm6
+	LOAD_IMMQ(m0XXX0XXX, %mm5)
+	LOAD_IMMQ(m0VVV0VVV, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -1004,9 +1008,10 @@ PR_(imlib_mmx_reshade_copy_rgb_to_rgba):
 	ENTER
 
 	pxor %mm4, %mm4
-	movq m0XXX0XXX, %mm5
-	movq m0VVV0VVV, %mm6
-	movq mX000X000, %mm7
+	LOAD_IMMQ(m0XXX0XXX, %mm5)
+	LOAD_IMMQ(m0VVV0VVV, %mm6)
+	LOAD_IMMQ(mX000X000, %mm7)
+	CLEANUP_IMMQ_LOADS(3)
 
 	subl $4, %esi
 	subl $4, %edi
Index: asm_blend_cmod.S
===================================================================
RCS file: /cvsroot/enlightenment/e17/libs/imlib2/src/lib/asm_blend_cmod.S,v
retrieving revision 1.3
diff -u -p -r1.3 asm_blend_cmod.S
--- asm_blend_cmod.S	6 Aug 2005 20:30:27 -0000	1.3
+++ asm_blend_cmod.S	6 Aug 2005 23:47:12 -0000
@@ -112,21 +112,9 @@
 	.type PT_(imlib_mmx_reshade_copy_rgba_to_rgba_cmod,@function)
 .globl PR_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod)
 	.type PT_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod,@function)
-	
-/*\ Some useful masks \*/
-m0X000000: .byte   0,   0,   0,   0,   0,   0, 255,   0
-m10000000: .byte   0,   0,   0,   0,   0,   0,   0,   1
-m00XXXXXX: .byte 255, 255, 255, 255, 255, 255,   0,   0
-mVX000000: .byte   0,   0,   0,   0,   0,   0, 255, 127
-mV0000000: .byte   0,   0,   0,   0,   0,   0,   0, 128
-m0XXX0XXX: .byte 255, 255, 255,   0, 255, 255, 255,   0
-mX000X000: .byte   0,   0,   0, 255,   0,   0,   0, 255
-m10001000: .byte   0,   0,   0,   1,   0,   0,   0,   1
-m000V0V0V: .byte 127,   0, 127,   0, 127,   0,   0,   0
-mI0000000: .byte   0,   0,   0,   0,   0,   0,   0,  64
-m0VVV0VVV: .byte 127, 127, 127,   0, 127, 127, 127,   0
-c1: .word 0x1, 0x1, 0x1, 0x1
 
+#include "asm_loadimmq.S"
+	
 /*\ MMX register use:
 |*| %mm1 = Source value
 |*| %mm2 = Destination value
@@ -364,7 +352,8 @@ PR_(imlib_mmx_blend_rgba_to_rgb_cmod):
 	ENTER
 
 	pxor %mm4, %mm4
-	movq c1, %mm5
+	LOAD_IMMQ(c1, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	LOOP_START
 1:
@@ -420,9 +409,10 @@ PR_(imlib_mmx_blend_rgba_to_rgba_cmod):
 	ENTER
 
 	pxor %mm4, %mm4
-	movq m0X000000, %mm5
-	movq m00XXXXXX, %mm6
-	movq c1, %mm7
+	LOAD_IMMQ(m0X000000, %mm5)
+	LOAD_IMMQ(m00XXXXXX, %mm6)
+	LOAD_IMMQ(c1, %mm7)
+	CLEANUP_IMMQ_LOADS(3)
 
 	LOOP_START
 1:
@@ -475,7 +465,8 @@ PR_(imlib_mmx_blend_rgb_to_rgb_cmod):
 	ENTER
 
 	pxor %mm4, %mm4
-	movq c1, %mm5
+	LOAD_IMMQ(c1, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	/*\ Load alpha beforehand, as it's always amap(0xff) \*/
 	movzbl amap_ff, %eax
@@ -519,9 +510,10 @@ PR_(imlib_mmx_blend_rgb_to_rgba_cmod):
 	ENTER
 
 	pxor %mm4, %mm4
-	movq m0X000000, %mm5
-	movq m00XXXXXX, %mm6
-	movq c1, %mm7
+	LOAD_IMMQ(m0X000000, %mm5)
+	LOAD_IMMQ(m00XXXXXX, %mm6)
+	LOAD_IMMQ(c1, %mm7)
+	CLEANUP_IMMQ_LOADS(3)
 
 	LOOP_START
 1:
@@ -688,8 +680,9 @@ PR_(imlib_mmx_add_blend_rgba_to_rgba_cmo
 	ENTER
 
 	pxor %mm4, %mm4
-	movq mVX000000, %mm5
-	movq m00XXXXXX, %mm6
+	LOAD_IMMQ(mVX000000, %mm5)
+	LOAD_IMMQ(m00XXXXXX, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	LOOP_START
 1:
@@ -778,8 +771,9 @@ PR_(imlib_mmx_add_blend_rgb_to_rgba_cmod
 	ENTER
 
 	pxor %mm4, %mm4
-	movq mVX000000, %mm5
-	movq m00XXXXXX, %mm6
+	LOAD_IMMQ(mVX000000, %mm5)
+	LOAD_IMMQ(m00XXXXXX, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	LOOP_START
 1:
@@ -826,7 +820,8 @@ SIZE(imlib_mmx_add_blend_rgb_to_rgba_cmo
 PR_(imlib_mmx_add_copy_rgba_to_rgb_cmod):
 	ENTER
 
-	movq m0XXX0XXX, %mm5
+	LOAD_IMMQ(m0XXX0XXX, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -970,8 +965,9 @@ PR_(imlib_mmx_subtract_blend_rgba_to_rgb
 	ENTER
 
 	pxor %mm4, %mm4
-	movq mV0000000, %mm5
-	movq m00XXXXXX, %mm6
+	LOAD_IMMQ(mV0000000, %mm5)
+	LOAD_IMMQ(m00XXXXXX, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	LOOP_START
 1:
@@ -1060,8 +1056,9 @@ PR_(imlib_mmx_subtract_blend_rgb_to_rgba
 	ENTER
 
 	pxor %mm4, %mm4
-	movq mV0000000, %mm5
-	movq m00XXXXXX, %mm6
+	LOAD_IMMQ(mV0000000, %mm5)
+	LOAD_IMMQ(m00XXXXXX, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	LOOP_START
 1:
@@ -1108,7 +1105,8 @@ SIZE(imlib_mmx_subtract_blend_rgb_to_rgb
 PR_(imlib_mmx_subtract_copy_rgba_to_rgb_cmod):
 	ENTER
 
-	movq m0XXX0XXX, %mm5
+	LOAD_IMMQ(m0XXX0XXX, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -1146,7 +1144,8 @@ SIZE(imlib_mmx_subtract_copy_rgba_to_rgb
 PR_(imlib_mmx_subtract_copy_rgba_to_rgba_cmod):
 	ENTER
 
-	movq mX000X000, %mm5
+	LOAD_IMMQ(mX000X000, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -1188,7 +1187,8 @@ SIZE(imlib_mmx_subtract_copy_rgba_to_rgb
 PR_(imlib_mmx_subtract_copy_rgb_to_rgba_cmod):
 	ENTER
 
-	movq mX000X000, %mm5
+	LOAD_IMMQ(mX000X000, %mm5)
+	CLEANUP_IMMQ_LOADS(1)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -1227,7 +1227,8 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgb_
 	ENTER
 
 	pxor %mm4, %mm4
-	movq m000V0V0V, %mm6
+	LOAD_IMMQ(m000V0V0V, %mm6)
+	CLEANUP_IMMQ_LOADS(1)
 
 	LOOP_START
 1:
@@ -1270,9 +1271,10 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgba
 	ENTER
 
 	pxor %mm4, %mm4
-	movq mI0000000, %mm5
-	movq m000V0V0V, %mm6
-	movq m00XXXXXX, %mm7
+	LOAD_IMMQ(mI0000000, %mm5)
+	LOAD_IMMQ(m000V0V0V, %mm6)
+	LOAD_IMMQ(m00XXXXXX, %mm7)
+	CLEANUP_IMMQ_LOADS(3)
 
 	LOOP_START
 1:
@@ -1322,7 +1324,8 @@ PR_(imlib_mmx_reshade_blend_rgb_to_rgb_c
 	ENTER
 
 	pxor %mm4, %mm4
-	movq m000V0V0V, %mm6
+	LOAD_IMMQ(m000V0V0V, %mm6)
+	CLEANUP_IMMQ_LOADS(1)
 
 	/*\ Load alpha beforehand, as it's always amap(0xff) \*/
 	movzbl amap_ff, %eax
@@ -1365,9 +1368,10 @@ PR_(imlib_mmx_reshade_blend_rgb_to_rgba_
 	ENTER
 
 	pxor %mm4, %mm4
-	movq mI0000000, %mm5
-	movq m000V0V0V, %mm6
-	movq m00XXXXXX, %mm7
+	LOAD_IMMQ(mI0000000, %mm5)
+	LOAD_IMMQ(m000V0V0V, %mm6)
+	LOAD_IMMQ(m00XXXXXX, %mm7)
+	CLEANUP_IMMQ_LOADS(3)
 
 	LOOP_START
 1:
@@ -1417,8 +1421,9 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgb_c
 	ENTER
 
 	pxor %mm4, %mm4
-	movq m0XXX0XXX, %mm5
-	movq m0VVV0VVV, %mm6
+	LOAD_IMMQ(m0XXX0XXX, %mm5)
+	LOAD_IMMQ(m0VVV0VVV, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -1481,8 +1486,9 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgba_
 	ENTER
 
 	pxor %mm4, %mm4
-	movq m0XXX0XXX, %mm5
-	movq m0VVV0VVV, %mm6
+	LOAD_IMMQ(m0XXX0XXX, %mm5)
+	LOAD_IMMQ(m0VVV0VVV, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	subl $4, %esi
 	subl $4, %edi
@@ -1546,8 +1552,9 @@ PR_(imlib_mmx_reshade_copy_rgb_to_rgba_c
 	ENTER
 
 	pxor %mm4, %mm4
-	movq m0XXX0XXX, %mm5
-	movq m0VVV0VVV, %mm6
+	LOAD_IMMQ(m0XXX0XXX, %mm5)
+	LOAD_IMMQ(m0VVV0VVV, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	subl $4, %esi
 	subl $4, %edi
Index: asm_loadimmq.S
===================================================================
RCS file: asm_loadimmq.S
diff -N asm_loadimmq.S
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ asm_loadimmq.S	6 Aug 2005 23:47:12 -0000
@@ -0,0 +1,77 @@
+/*\ constants and macros for x86 assembly files. \*/
+
+
+/*\ Constants for asm_blend.S and asm_blend_cmod.S \*/
+/* m0X000000: .byte   0,   0,   0,   0,   0,   0, 255,   0 */
+#define m0X000000_H 0x00FF0000
+#define m0X000000_L 0x00000000
+/* m10000000: .byte   0,   0,   0,   0,   0,   0,   0,   1 */
+#define m10000000_H 0x01000000
+#define m10000000_L 0x00000000
+/* m00XXXXXX: .byte 255, 255, 255, 255, 255, 255,   0,   0 */
+#define m00XXXXXX_H 0x0000FFFF
+#define m00XXXXXX_L 0xFFFFFFFF
+/* mVX000000: .byte   0,   0,   0,   0,   0,   0, 255, 127 */
+#define mVX000000_H 0x7FFF0000
+#define mVX000000_L 0x00000000
+/* mV0000000: .byte   0,   0,   0,   0,   0,   0,   0, 128 */
+#define mV0000000_H 0x80000000
+#define mV0000000_L 0x00000000
+/* m0XXX0XXX: .byte 255, 255, 255,   0, 255, 255, 255,   0 */
+#define m0XXX0XXX_H 0x00FFFFFF
+#define m0XXX0XXX_L 0x00FFFFFF
+/* mX000X000: .byte   0,   0,   0, 255,   0,   0,   0, 255 */
+#define mX000X000_H 0xFF000000
+#define mX000X000_L 0xFF000000
+/* m10001000: .byte   0,   0,   0,   1,   0,   0,   0,   1 */
+#define m10001000_H 0x01000000
+#define m10001000_L 0x01000000
+/* m000V0V0V: .byte 127,   0, 127,   0, 127,   0,   0,   0 */
+#define m000V0V0V_H 0x0000007F
+#define m000V0V0V_L 0x007F007F
+/* mI0000000: .byte   0,   0,   0,   0,   0,   0,   0,  64 */
+#define mI0000000_H 0x40000000
+#define mI0000000_L 0x00000000
+/* m0VVV0VVV: .byte 127, 127, 127,   0, 127, 127, 127,   0 */
+#define m0VVV0VVV_H 0x007F7F7F
+#define m0VVV0VVV_L 0x007F7F7F
+/* c1: .word 0x1, 0x1, 0x1, 0x1 */
+#define c1_H 0x00010001
+#define c1_L 0x00010001
+
+
+/*\ Constants for asm_rgba.S \*/
+/* m_rb: .long 0x00f800f8, 0x00f800f8 */
+#define m_rb_H 0x00f800f8
+#define m_rb_L 0x00f800f8
+/* m_r:  .long 0xf800f800, 0xf800f800 */
+#define m_r_H 0xf800f800
+#define m_r_L 0xf800f800
+/* m_g6: .long 0x0000fc00, 0x0000fc00 */
+#define m_g6_H 0x0000fc00
+#define m_g6_L 0x0000fc00
+/* m_g5: .long 0x0000f800, 0x0000f800 */
+#define m_g5_H 0x0000f800
+#define m_g5_L 0x0000f800
+/*\ Multiply constants to fake two shifts at once \*/
+/* mul_rgb565: .long 0x20000004, 0x20000004 */
+#define mul_rgb565_H 0x20000004
+#define mul_rgb565_L 0x20000004
+/* mul_bgr565: .long 0x00042000, 0x00042000 */
+#define mul_bgr565_H 0x00042000
+#define mul_bgr565_L 0x00042000
+/* mul_rgb555: .long 0x20000008, 0x20000008 */
+#define mul_rgb555_H 0x20000008
+#define mul_rgb555_L 0x20000008
+/* mul_bgr555: .long 0x00082000, 0x00082000 */
+#define mul_bgr555_H 0x00082000
+#define mul_bgr555_L 0x00082000
+
+
+/*\ Load an 8-byte constant to an mmx register \*/
+#define LOAD_IMMQ(mask, reg)		\
+	pushl	$mask##_H		;\
+	pushl	$mask##_L		;\
+	movq	(%esp), reg
+#define CLEANUP_IMMQ_LOADS(num_loaded)	\
+	addl	$ num_loaded * 8, %esp
Index: asm_rgba.S
===================================================================
RCS file: /cvsroot/enlightenment/e17/libs/imlib2/src/lib/asm_rgba.S,v
retrieving revision 1.3
diff -u -p -r1.3 asm_rgba.S
--- asm_rgba.S	6 Aug 2005 20:30:27 -0000	1.3
+++ asm_rgba.S	6 Aug 2005 23:47:12 -0000
@@ -55,16 +55,7 @@
 .globl PR_(imlib_get_cpuid)
 	.type PT_(imlib_get_cpuid,@function)
 
-/*\ Some useful masks \*/
-m_rb: .long 0x00f800f8, 0x00f800f8
-m_r:  .long 0xf800f800, 0xf800f800
-m_g6: .long 0x0000fc00, 0x0000fc00
-m_g5: .long 0x0000f800, 0x0000f800
-/*\ Multiply constants to fake two shifts at once \*/
-mul_rgb565: .long 0x20000004, 0x20000004
-mul_bgr565: .long 0x00042000, 0x00042000
-mul_rgb555: .long 0x20000008, 0x20000008
-mul_bgr555: .long 0x00082000, 0x00082000
+#include "asm_loadimmq.S"
 
 /*\ Common code \*/
 /*\ Save registers, load common parameters \*/
@@ -114,18 +105,21 @@ mul_bgr555: .long 0x00082000, 0x00082000
 
 
 PR_(imlib_mmx_bgr565_fast):
-	movq mul_bgr565, %mm7	/*\ This constant is the only difference \*/
+	LOAD_IMMQ(mul_bgr565, %mm7)	/*\ This constant is the only difference \*/
+	CLEANUP_IMMQ_LOADS(1)
 	jmp .rgb565_fast_entry
 
 SIZE(imlib_mmx_bgr565_fast)
 
 PR_(imlib_mmx_rgb565_fast):
-	movq mul_rgb565, %mm7
+	LOAD_IMMQ(mul_rgb565, %mm7)
+	CLEANUP_IMMQ_LOADS(1)
 .rgb565_fast_entry:
 	ENTER
 
-	movq m_rb, %mm5
-	movq m_g6, %mm6
+	LOAD_IMMQ(m_rb, %mm5)
+	LOAD_IMMQ(m_g6, %mm6)
+	CLEANUP_IMMQ_LOADS(1)
 
 	LOOP_START
 
@@ -193,18 +187,21 @@ SIZE(imlib_mmx_rgb565_fast)
 
 
 PR_(imlib_mmx_bgr555_fast):
-	movq mul_bgr555, %mm7	/*\ This constant is the only difference \*/
+	LOAD_IMMQ(mul_bgr555, %mm7)	/*\ This constant is the only difference \*/
+	CLEANUP_IMMQ_LOADS(1)
 	jmp .rgb555_fast_entry
 
 SIZE(imlib_mmx_bgr555_fast)
 
 PR_(imlib_mmx_rgb555_fast):
-	movq mul_rgb555, %mm7
+	LOAD_IMMQ(mul_rgb555, %mm7)
+	CLEANUP_IMMQ_LOADS(1)
 .rgb555_fast_entry:
 	ENTER
 
-	movq m_rb, %mm5
-	movq m_g5, %mm6
+	LOAD_IMMQ(m_rb, %mm5)
+	LOAD_IMMQ(m_g5, %mm6)
+	CLEANUP_IMMQ_LOADS(2)
 
 	LOOP_START
 

Reply via email to