Jeff Pohlmeyer schreef:
this kludge is about 25% faster than your perl script
on my machine....

Nope. It's still more or less twice slower. :-D


I guess it depends on the hardware:

% time koleksi.pl   # perl
Word count: 126944
Unique word count: 11793

real    0m1.019s
user    0m0.992s
sys     0m0.028s


% time koleksi   # fpc
Word count:126944
Unique word count:11793

real    0m0.817s
user    0m0.784s
sys     0m0.020s


AMD-K6-700 / SuSE-10.3 / Linux-2.6.22  / perl-5.8.8 / fpc-2.2.0



Thanks Jeff, for writing that parser code, I am not good in doing that.

I made it three times as fast on my computer (windows 2000, fpc 2.3.1, P4 1.5 Ghz) using a hashlist for the unique word count. Using a larger textbuf gave an additional 10% speed up:

program project1;
{$MODE OBJFPC} {$H+}

uses classes, strings, contnrs;

const
  bufsize = $1FFF;

var
  f: text;
  s:ansistring;
  wc:longint=0;
  wl:TStringList;
  uhl: TFPStringHashTable;
  i,n:LongInt;
  textbuf: array[0..bufsize-1] of byte;

begin
  assign(f, 'Koleksi.dat');
  reset(f);
  SetTextBuf(f, textbuf, sizeof(textbuf));
  wl:=TStringList.Create();
  uhl:=TFPStringHashTable.Create;
  while not eof(f) do begin
    readln(f,s);
    n:=length(s);
    if (n>0) then begin
    StrLower(@s[1]);
      if (s[1]='<') then begin
        if StrLComp(@s[1], '<title>',7) = 0 then begin
          delete(s,1,7);
        end else continue;
      end;
      for i:=1 to n do if not (s[i] in ['a'..'z','0'..'9']) then begin
        if ( s[i] <> '<' ) then begin
          s[i]:=#10
        end else begin
          s[i]:=#0;
          SetLength(s,StrLen(@s[1]));
          break;
        end;
      end;
      wl.Text:=s;
      for i:=0 to wl.Count-1 do begin
        s:=wl[i];
        for n:=1 to length(s) do if (s[n] in ['0'..'9']) then begin
          s:='';
          break;
        end;
        if (s<>'') then begin
          inc(wc);
          if uhl.Find(s) = nil then
            uhl.Add(s,'');
        end;
      end;
    end;
  end;
  close(f);
  WriteLn('Word count:',wc, #10'Unique word count:', uhl.Count);
end.
_______________________________________________
fpc-pascal maillist  -  fpc-pascal@lists.freepascal.org
http://lists.freepascal.org/mailman/listinfo/fpc-pascal

Reply via email to