Hi Tomas,

Do you think this should suffice? Originally it ran for 1,000,000 repetitions but I fear that will take way too long on a 486, so I reduced it to 10,000.

Kit

On 03/10/2023 06:30, Tomas Hajny via fpc-devel wrote:
On October 3, 2023 03:32:34 +0200, "J. Gareth Moreton via fpc-devel" 
<fpc-devel@lists.freepascal.org> wrote:


Hii Kit,

This is mainly to Florian, but also to anyone else who can answer the question 
- at which point did a complex LEA instruction (using all three input operands 
and some other specific circumstances) get slow?  Preliminary research suggests 
the 486 was when it gained extra latency, and then Sandy Bridge when it got 
particularly bad.  Icy Lake seems to be the architecture where faster LEA 
instructions are reintroduced, but I'm not sure about AMD processors.
I cannot answer your question, but if you prepare a test program, I can run it 
on an Intel 486 DX2 100 Mhz and AMD Athlon 1 GHz machines if it helps you in 
any way (at least I hope the 486 DX2 machine should be still able to start ;-) 
).

Tomas

_______________________________________________
fpc-devel maillist  -  fpc-devel@lists.freepascal.org
https://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel
program leatest;
{$MODE OBJFPC}
{$ASMMODE Intel}

uses
  SysUtils;
  
type
  TBenchmarkProc = function(const Input, X, Y: LongWord): LongWord;
 

function Checksum_PAS(const Input, X, Y: LongWord): LongWord;
var
  Counter: LongWord;
begin
  Result := Input;
  Counter := Y;
  while (Counter > 0) do
    begin
      Result := X + Counter + $87654321;
      Dec(Counter);
    end;
end;

function Checksum_ADD(const Input, X, Y: LongWord): LongWord; assembler; 
nostackframe;
asm
@Loop1:
{$ifdef CPUX86_64}
  ADD ECX, $87654321
  ADD ECX, EDX
  XOR ECX, R8D
  DEC R8D
  JNZ @Loop1
  MOV EAX, ECX
{$else CPUX86_64}
  ADD EAX, $87654321
  ADD EAX, EDX
  XOR EAX, ECX
  DEC ECX
  JNZ @Loop1
{$endif CPUX86_64}
end;

function Checksum_LEA(const Input, X, Y: LongWord): LongWord; assembler; 
nostackframe;
asm
@Loop2:
{$ifdef CPUX86_64}
  LEA ECX, [ECX + EDX + $87654321]
  XOR ECX, R8D
  DEC R8D
  JNZ @Loop2
  MOV EAX, ECX
{$else CPUX86_64}
  LEA EAX, [EAX + EDX + $87654321]
  XOR EAX, ECX
  DEC ECX
  JNZ @Loop2
{$endif CPUX86_64}
end;

function Benchmark(const name: string; proc: TBenchmarkProc; Z, X: LongWord): 
LongWord;
const
  internal_reps = 1000;
var
  start: TDateTime;
  time: double;
  reps: cardinal;
begin
  Result := Z;
  reps := 0;
  start := Now;
  repeat
    inc(reps);
    proc(Result, X, internal_reps);
    time := (Now - start) * SecsPerDay;
  until (reps >= 10000);
  time := time / reps / internal_reps * 1e9;
  writeln(name, ': ', time:0:ord(time < 10), ' ns/call');
end;

var
  Results: array[0..2] of LongWord;
  FailureCode: Integer;
begin
  Results[0] := Benchmark('   Pascal control case', @Checksum_PAS, 5000000, 
1000);
  Results[1] := Benchmark(' Using LEA instruction', @Checksum_LEA, 5000000, 
1000);
  Results[2] := Benchmark('Using ADD instructions', @Checksum_ADD, 5000000, 
1000);
  
  FailureCode := 0;

  if (Results[0] <> Results[1]) then
    begin
      WriteLn('ERROR: Checksum_LEA doesn''t match control case');
      FailureCode := FailureCode or 1;
    end;
  if (Results[0] <> Results[2]) then
    begin
      WriteLn('ERROR: Checksum_ADD doesn''t match control case');
      FailureCode := FailureCode or 2
    end;
    
  if FailureCode <> 0 then
    Halt(FailureCode);
end.
_______________________________________________
fpc-devel maillist  -  fpc-devel@lists.freepascal.org
https://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel

Reply via email to