Hi,
regarding the problem I mentionted about the new amd64 optimized
functions in imlib2, I think I found the problem, has something to do
with the fact that memory was not aligned in some (SSE2 128 bit) MOV
operations - ie, I just changed a couple of MOVDQA to MOVDQU in file
amd64_blend.S, treating memory as unaligned; 
Now if this has some other side effects (speed?) I don't know, but for
me it worked now...

Cheers,
Tiago Gehring


 
diff -u -r e17/libs/imlib2/src/lib/amd64_blend.S ../e17_ori/e17/libs/imlib2/src/lib/amd64_blend.S
--- e17/libs/imlib2/src/lib/amd64_blend.S	2005-08-07 07:07:23.000000000 +0000
+++ ../e17_ori/e17/libs/imlib2/src/lib/amd64_blend.S	2005-08-22 07:10:00.000000000 +0000
@@ -168,8 +168,8 @@
 	ENTER
 
 	pxor %xmm4, %xmm4
-	movdqa c1(%rip), %xmm5
-	movdqa m00XXXXXX(%rip), %xmm6
+	movdqu c1(%rip), %xmm5
+	movdqu m00XXXXXX(%rip), %xmm6
 
 	/* Move right to left across each line, */ 
 	/* processing in two pixel chunks */ 
@@ -565,9 +565,9 @@
 	ENTER
 
 	pxor %xmm4, %xmm4
-	movdqa c1(%rip), %xmm5
+	movdqu c1(%rip), %xmm5
 	xorq %rax, %rax
-	movdqa mX000X000X000X000(%rip), %xmm6
+	movdqu mX000X000X000X000(%rip), %xmm6
 	movq [EMAIL PROTECTED](%rip), %r13
 
 	/* Move right to left across each line, */ 
@@ -994,8 +994,8 @@
 PR_(imlib_amd64_copy_rgba_to_rgb):
 	ENTER
 
-	movdqa m0XXX0XXX0XXX0XXX(%rip), %xmm5
-	movdqa mX000X000X000X000(%rip), %xmm6
+	movdqu m0XXX0XXX0XXX0XXX(%rip), %xmm5
+	movdqu mX000X000X000X000(%rip), %xmm6
 
 	leaq (%rsi, %r8, 4), %rsi
 	leaq (%rdi, %r8, 4), %rdi

Reply via email to