Hi Tomas,
Do you think this should suffice? Originally it ran for 1,000,000
repetitions but I fear that will take way too long on a 486, so I
reduced it to 10,000.
Kit
On 03/10/2023 06:30, Tomas Hajny via fpc-devel wrote:
On October 3, 2023 03:32:34 +0200, "J. Gareth Moreton via fpc-devel"
<fpc-devel@lists.freepascal.org> wrote:
Hii Kit,
This is mainly to Florian, but also to anyone else who can answer the question
- at which point did a complex LEA instruction (using all three input operands
and some other specific circumstances) get slow? Preliminary research suggests
the 486 was when it gained extra latency, and then Sandy Bridge when it got
particularly bad. Icy Lake seems to be the architecture where faster LEA
instructions are reintroduced, but I'm not sure about AMD processors.
I cannot answer your question, but if you prepare a test program, I can run it
on an Intel 486 DX2 100 Mhz and AMD Athlon 1 GHz machines if it helps you in
any way (at least I hope the 486 DX2 machine should be still able to start ;-)
).
Tomas
_______________________________________________
fpc-devel maillist - fpc-devel@lists.freepascal.org
https://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel
program leatest;
{$MODE OBJFPC}
{$ASMMODE Intel}
uses
SysUtils;
type
TBenchmarkProc = function(const Input, X, Y: LongWord): LongWord;
function Checksum_PAS(const Input, X, Y: LongWord): LongWord;
var
Counter: LongWord;
begin
Result := Input;
Counter := Y;
while (Counter > 0) do
begin
Result := X + Counter + $87654321;
Dec(Counter);
end;
end;
function Checksum_ADD(const Input, X, Y: LongWord): LongWord; assembler;
nostackframe;
asm
@Loop1:
{$ifdef CPUX86_64}
ADD ECX, $87654321
ADD ECX, EDX
XOR ECX, R8D
DEC R8D
JNZ @Loop1
MOV EAX, ECX
{$else CPUX86_64}
ADD EAX, $87654321
ADD EAX, EDX
XOR EAX, ECX
DEC ECX
JNZ @Loop1
{$endif CPUX86_64}
end;
function Checksum_LEA(const Input, X, Y: LongWord): LongWord; assembler;
nostackframe;
asm
@Loop2:
{$ifdef CPUX86_64}
LEA ECX, [ECX + EDX + $87654321]
XOR ECX, R8D
DEC R8D
JNZ @Loop2
MOV EAX, ECX
{$else CPUX86_64}
LEA EAX, [EAX + EDX + $87654321]
XOR EAX, ECX
DEC ECX
JNZ @Loop2
{$endif CPUX86_64}
end;
function Benchmark(const name: string; proc: TBenchmarkProc; Z, X: LongWord):
LongWord;
const
internal_reps = 1000;
var
start: TDateTime;
time: double;
reps: cardinal;
begin
Result := Z;
reps := 0;
start := Now;
repeat
inc(reps);
proc(Result, X, internal_reps);
time := (Now - start) * SecsPerDay;
until (reps >= 10000);
time := time / reps / internal_reps * 1e9;
writeln(name, ': ', time:0:ord(time < 10), ' ns/call');
end;
var
Results: array[0..2] of LongWord;
FailureCode: Integer;
begin
Results[0] := Benchmark(' Pascal control case', @Checksum_PAS, 5000000,
1000);
Results[1] := Benchmark(' Using LEA instruction', @Checksum_LEA, 5000000,
1000);
Results[2] := Benchmark('Using ADD instructions', @Checksum_ADD, 5000000,
1000);
FailureCode := 0;
if (Results[0] <> Results[1]) then
begin
WriteLn('ERROR: Checksum_LEA doesn''t match control case');
FailureCode := FailureCode or 1;
end;
if (Results[0] <> Results[2]) then
begin
WriteLn('ERROR: Checksum_ADD doesn''t match control case');
FailureCode := FailureCode or 2
end;
if FailureCode <> 0 then
Halt(FailureCode);
end.
_______________________________________________
fpc-devel maillist - fpc-devel@lists.freepascal.org
https://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel