It did some changes to CompareByte in rtl/x86_64/x86_64.inc to reduce the code size and make it run faster (see attached path). I was successful with the code size deduction (47 bytes vs. 62 bytes) and also with the speed (according to a micro benchmark [1] run on an Ivy Bridge desktop).

To achieve this I used movzbl twice. But then I came across the comment in FillChar (also in rtl/x86_64/x86_64.inc) about movzbl breaking targets using external GAS (Mantis #19188). As this Mantis issue is dated back in 2011 my question is: Is this still valid? And what would be the preferred way to overcome this issue?
{$ifdef oldbinutils}
   .byte 0x0F,0xb6,0x01
{$else}
   movzbl (%rcx),%eax
{$endif}


Markus

[1] the benchmark compares a 10 MB memory block with itself 10000 times
Index: trunk/rtl/x86_64/x86_64.inc
===================================================================
--- trunk/rtl/x86_64/x86_64.inc	(Revision 37365)
+++ trunk/rtl/x86_64/x86_64.inc	(Arbeitskopie)
@@ -645,8 +645,8 @@
 
     .balign 8
 .LCmpbyteLoop:
-    movb    (%rcx),%r9b
-    cmpb    (%rdx),%r9b
+    movzbl  (%rcx),%eax
+    cmpb    (%rdx),%al
     leaq    1(%rcx),%rcx
     leaq    1(%rdx),%rdx
     jne     .LCmpbyteExitFast
@@ -653,14 +653,12 @@
     decq    %r8
     jne     .LCmpbyteLoop
 .LCmpbyteExitFast:
-     movzbq  -1(%rdx),%r8     { Compare last position }
-     movzbq  %r9b,%rax
-     subq    %r8,%rax
-     ret
+     movzbl  -1(%rdx),%ecx    { Compare last position }
+     subq    %rcx,%rax
+     retq
 
 .LCmpbyteZero:
-     movq    $0,%rax
-     ret
+     xorl    %eax,%eax
 end;
 {$endif FPC_SYSTEM_HAS_COMPAREBYTE}
 
_______________________________________________
fpc-devel maillist  -  fpc-devel@lists.freepascal.org
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel

Reply via email to