O/H ΘΕΟΦΙΛΟΣ ΦΩΤΟΠΟΥΛΟΣ έγραψε:
> It seems interesting. 
> 
> I'vre archived it, and will have a look when there is available time or 
> mood...
> 
> Thx and Well done..

I've forgotten that parser story...

Anyway, as I said in the previous message "I took the parsing code
from TStrings.SetDelimitedText() private method". Just wasted my
time. That parser was a veeeeeeeery slow one.

So, please Theofilos, do not use that previous version.

I wrote a brand new one which is as fast as possible. Here it is.

//=============================================================

unit cso_CSV;

interface

uses
    SysUtils
   ,Classes
   ,Types
   ,DB
   ;

type
(*----------------------------------------------------------------------------
  A very fast csv parser.

  To use it call one of the two overloaded Parse() methods. The parser calls a
  user provided call-back method for each line it parses. Also it can pass
  parsed data to a user provided TDataset. That dataset must be already
  created and Active.

  Example:
      Example:
        Table := TClientDataSet.Create(Self);
        Table.FieldDefs.Add('CODE'             , ftString, 32);
        Table.FieldDefs.Add('DATE'             , ftString, 24);
        Table.FieldDefs.Add('AMOUNT'           , ftString, 16);
        Table.FieldDefs.Add('DEBIT_CREDIT'     , ftString, 8);
        Table.FieldDefs.Add('CURRENCY_CODE'    , ftString, 8);
        Table.FieldDefs.Add('VAT_CODE'         , ftString, 8);
        Table.CreateDataSet();
        Table.Active := True;


        Parser.Parse('C:\FileName.csv',       // FileName: string
               6,                             // FieldCount: Integer - the 
number of fields the file contains
               ';',                           // Delimiter: Char - field 
delimiter
               Form1.Parser_OnLine,           // OnLine: TNotifyEvent - user 
provided call back
               True,                          // SkipFirstLine: Boolean - when 
the first line contains field names
               Table                          // Table: TDataset - could be nil
               );

  It is also possible to pass nil to the last parameter and use the call back
  to access each parsed line. Here is an example

    procedure TMainForm.Parser_OnLine(Sender: TObject);
    var
      i : Integer;
      S : string;
    begin
      S := '';
      for i := 0 to TCSVParser(Sender).FieldCount - 1 do
        S := S + TCSVParser(Sender).Values[i] + '|';
      Memo.Lines.Add(S);
    end;

    procedure TMainForm.btnExecuteClick(Sender: TObject);
    const
       LB    = #13#10;  { LineBreak }

       cData = ' "a' + #13 + 'b"' + LB;

       cData2 =
       '  "And", "when, the", "night is cloudy"  '  + LB +
       '  "there", "is""' + #13 + 'still a light", "that shines on me" '  + LB +
       '  "shine until", "tomorrow", "let it be"  '  + LB;

       cData3 =
       '  And, when the, night is " cloudy '  + LB +
       '  there, is still a light,   that " shines on me '  + LB +
       '  shine" until,   tomorrow, let  it be  ' ;
    var
      SS     : TStringStream;
      MS     : TMemoryStream;
      Parser : TCSVParser;
    begin
      Memo.Clear;

      SS := TStringStream.Create(cData3);
      MS := TMemoryStream.Create;
      try
        MS.LoadFromStream(SS);
        MS.Position := 0;

        Parser := TCSVParser.Create;
        try
          Parser.Parse(MS.Memory, MS.Size, 3, ',', Parser_OnLine, False, nil);
        finally
          Parser.Free;
        end;
      finally
        SS.Free;
        MS.Free;
      end;

    end;

  -----------------------------------------------------------------------------
  There is NOT a CSV standard. The RFC 4180 is just an informational text
  which "documents the format used for Comma-Separated Values (CSV) files
  and registers the associated MIME type "text/csv". "

  The RFC 4180 can be found at
    http://www.rfc-editor.org/rfc/rfc4180.txt
    http://tools.ietf.org/html/rfc4180
  -----------------------------------------------------------------------------
  Freeware, use it at your on risk.

  Copyright © 2009 Theodoros Bebekis, Thessaloniki, Greece
  (teo point bebekis at gmail point com)
  ----------------------------------------------------------------------------*)
   TCSVParser = class(TObject)
   private
     FParserState         : Integer;
     FIsQuotedField       : Boolean;
     FSpaceCount          : Integer;
     FDelimiter           : Char;

     FBuffer              : PChar;
     FBufferPos           : Integer;
     FBufferSize          : Integer;

     FData                : PChar;
     FDataSize            : Cardinal;
     FDataPos             : Cardinal;

     FLineCount           : Integer;
     FFieldCount          : Integer;
     FValues              : TStringDynArray;
     FFieldIndex          : Integer;
     FIsJaggedLine        : Boolean;

     FOnLine              : TNotifyEvent;

     FSkipFirstLine       : Boolean;
     FTable               : TDataset;

     procedure Error(const Text: string);
   public
     procedure Parse(Data: Pointer; DataSize: Cardinal; FieldCount: Integer; 
Delimiter: Char; OnLine: TNotifyEvent; SkipFirstLine: Boolean; Table: TDataset 
= nil); overload;
     procedure Parse(const FileName: string; FieldCount: Integer; Delimiter: 
Char; OnLine: TNotifyEvent; SkipFirstLine: Boolean; Table: TDataset = nil); 
overload;

     property LineCount      : Integer read FLineCount;
     property FieldCount     : Integer read FFieldCount;
     property FieldIndex     : Integer read FFieldIndex;
     property IsJaggedLine   : Boolean read FIsJaggedLine;
     property Values         : TStringDynArray read FValues;
   end;


implementation


const
   NUL          = #0;
   CR           = #13 ;
   LF           = #10 ;
   DQUOTE       = '"' ;

   { curren field buffer block size }
const
   BLOCK_SIZE         = 128;
   MAX_BLOCK_SIZE     = 1024 * 8;


   { parser state }
const
   PS_IN_NEW_LINE      = 1;
   PS_INSIDE_FIELD     = 2;
   PS_OUTSIDE_FIELD    = 3;


{ TCSVParser }
(*----------------------------------------------------------------------------*)
procedure TCSVParser.Error(const Text: string);
begin
   raise Exception.CreateFmt('Error in line: %d. ' + Text, [FLineCount]);
end;
(*----------------------------------------------------------------------------*)
procedure TCSVParser.Parse(Data: Pointer; DataSize: Cardinal; FieldCount: 
Integer; Delimiter: Char; OnLine: TNotifyEvent; SkipFirstLine: Boolean; Table: 
TDataset);
var
   C      : Char;
   {---------------------------------------}
   procedure ReallocBuffer;
   begin
     if FBufferSize + BLOCK_SIZE >  MAX_BLOCK_SIZE then
       Error('Field data too long');

     FBuffer      := ReallocMemory(FBuffer, FBufferSize + BLOCK_SIZE);
     FBufferSize  := FBufferSize + BLOCK_SIZE;
   end;
   {---------------------------------------}
   function IsBlank(): Boolean;
   begin
     Result := C in [#1..#32]
   end;
   {---------------------------------------}
   function IsEscapeQuote: Boolean;
   begin
     Result := False;
     if (FDataSize > FDataPos) then
       Result := FData[FDataPos + 1] = DQUOTE;
   end;
   {---------------------------------------}
   procedure AddChar();
   begin
     FBuffer[FBufferPos] := C;
     FBufferPos := FBufferPos + 1;
   end;
   {---------------------------------------}
   procedure AddField();
   begin
     if not FIsQuotedField then
       FBufferPos := FBufferPos - FSpaceCount;

     SetString(FValues[FFieldIndex], FBuffer, FBufferPos);
     FFieldIndex := FFieldIndex + 1;

     FBufferPos      := 0;
     FSpaceCount     := 0;
     FIsQuotedField  := False;
   end;
   {---------------------------------------}
   procedure AddLine();
   var
     i : Integer;
   begin
     FIsJaggedLine := FFieldIndex <> FFieldCount - 1;

     if FIsJaggedLine then
       for i := FFieldIndex + 1 to FFieldCount - 1 do
         FValues[i] := '';

     if not (FSkipFirstLine and (FLineCount = 0)) then
     begin
       if Assigned(FTable) then
       begin
         FTable.Append;
         FTable.Edit();
         for i := 0 to FFieldIndex - 1 do
           FTable.Fields[i].AsString := FValues[i];
         FTable.Post();
       end;

       if Assigned(FOnLine) then
         FOnLine(Self);
     end;

     FFieldIndex := 0;

     FBufferPos      := 0;
     FSpaceCount     := 0;
     FIsQuotedField  := False;
     FLineCount      := FLineCount + 1;
   end;
   {---------------------------------------}
begin
   FData          := Data;
   FDataSize      := DataSize;
   FDataPos       := 0;

   FOnLine        := OnLine;

   FLineCount     := 0;
   FFieldCount    := FieldCount;
   SetLength(FValues, FieldCount);
   FFieldIndex    := 0;
   FIsJaggedLine  := False;
   FSkipFirstLine := SkipFirstLine;
   FTable         := Table;

   FDelimiter     := Delimiter;
   FSpaceCount    := 0;
   FIsQuotedField := False;
   FParserState   := PS_IN_NEW_LINE;

   FBufferPos     := 0;
   ReallocBuffer();

   try
     while (FDataPos < FDataSize) do
     begin

       if (FBufferPos = FBufferSize) then
         ReallocBuffer();

       Inc(FDataPos);
       C := FData[FDataPos];

       if (C = NUL) then
         Continue;

       case FParserState of
         PS_IN_NEW_LINE    : if (C = FDelimiter) then
                               Error('New line can not start with Delimiter')
                             else if not IsBlank() then
                             begin
                               if (C = DQUOTE) then
                                 FIsQuotedField := True
                               else
                                 AddChar();
                               FParserState := PS_INSIDE_FIELD;
                             end;

                             { inside a quoted field }
         PS_INSIDE_FIELD   : if FIsQuotedField then
                             begin
                               if (C = DQUOTE) then
                               begin
                                 if IsEscapeQuote() then
                                 begin
                                   AddChar();
                                   Inc(FDataPos);  // skip the second quote
                                 end else begin
                                   AddField();
                                   FParserState := PS_OUTSIDE_FIELD;
                                 end;
                               end else
                                 AddChar();

                             { inside a non-quoted field }
                             end else begin
                               if (C in [CR, LF]) then
                               begin
                                 AddField();
                                 AddLine();
                                 FParserState := PS_IN_NEW_LINE;
                               end else if (C = FDelimiter) then
                               begin
                                 AddField();
                                 FParserState := PS_OUTSIDE_FIELD;
                               end else
                               begin
                                 AddChar();
                                 if IsBlank() then
                                   FSpaceCount := FSpaceCount + 1
                                 else
                                   FSpaceCount := 0;
                               end;

                             end;
         PS_OUTSIDE_FIELD  : if (C in [CR, LF]) then
                             begin
                               AddLine();
                               FParserState := PS_IN_NEW_LINE;
                             end else if (not IsBlank()) and (C <> FDelimiter) 
then
                             begin
                               if (C = DQUOTE) then
                                 FIsQuotedField := True
                               else
                                 AddChar();
                               FParserState := PS_INSIDE_FIELD;
                             end;


       end;
     end;
   finally
     FreeMemory(FBuffer) ;
     FBuffer := nil;
   end;

end;
(*----------------------------------------------------------------------------*)
procedure TCSVParser.Parse(const FileName: string; FieldCount: Integer; 
Delimiter: Char; OnLine: TNotifyEvent; SkipFirstLine: Boolean; Table: TDataset);
var
   MS   : TMemoryStream;
   sNul : Char;
begin

   if FileExists(FileName) then
   begin
     MS := TMemoryStream.Create;
     try
       MS.LoadFromFile(FileName);
       MS.Position := 0;

       if MS.Size > 0 then
       begin
         MS.Position := MS.Position + MS.Size;
         sNul := NUL;
         MS.WriteBuffer(sNul, 1);
         MS.Position := 0;

         Parse(MS.Memory, MS.Size, FieldCount, Delimiter, OnLine, 
SkipFirstLine, Table);
       end;
     finally
       MS.Free;
     end;
   end;

end;

end.


//=============================================================


-- 
Regards
Theo
------------------------
Theo Bebekis
Thessaloniki, Greece
------------------------
C# and Delphi tutorials at http://teo.bebekis.googlepages.com/
------------------------
Greek_Delphi_Prog : a greek Delphi list at 
http://groups.yahoo.com/group/Greek_Delphi_Prog
CSharpDotNetGreek : a greek C# and .Net list at 
http://groups.yahoo.com/group/CSharpDotNetGreek
atla_custom : an ALTEC Atlantis Customization list at 
http://groups.yahoo.com/group/atla_custom
------------------------


------------------------------------

-----------------------------------------------------
Home page: http://groups.yahoo.com/group/delphi-en/
To unsubscribe: [email protected]! Groups Links

<*> To visit your group on the web, go to:
    http://groups.yahoo.com/group/delphi-en/

<*> Your email settings:
    Individual Email | Traditional

<*> To change settings online go to:
    http://groups.yahoo.com/group/delphi-en/join
    (Yahoo! ID required)

<*> To change settings via email:
    mailto:[email protected] 
    mailto:[email protected]

<*> To unsubscribe from this group, send an email to:
    [email protected]

<*> Your use of Yahoo! Groups is subject to:
    http://docs.yahoo.com/info/terms/

Reply via email to