O/H ΘΕΟΦΙΛΟΣ ΦΩΤΟΠΟΥΛΟΣ έγραψε:
> It seems interesting.
>
> I'vre archived it, and will have a look when there is available time or
> mood...
>
> Thx and Well done..
I've forgotten that parser story...
Anyway, as I said in the previous message "I took the parsing code
from TStrings.SetDelimitedText() private method". Just wasted my
time. That parser was a veeeeeeeery slow one.
So, please Theofilos, do not use that previous version.
I wrote a brand new one which is as fast as possible. Here it is.
//=============================================================
unit cso_CSV;
interface
uses
SysUtils
,Classes
,Types
,DB
;
type
(*----------------------------------------------------------------------------
A very fast csv parser.
To use it call one of the two overloaded Parse() methods. The parser calls a
user provided call-back method for each line it parses. Also it can pass
parsed data to a user provided TDataset. That dataset must be already
created and Active.
Example:
Example:
Table := TClientDataSet.Create(Self);
Table.FieldDefs.Add('CODE' , ftString, 32);
Table.FieldDefs.Add('DATE' , ftString, 24);
Table.FieldDefs.Add('AMOUNT' , ftString, 16);
Table.FieldDefs.Add('DEBIT_CREDIT' , ftString, 8);
Table.FieldDefs.Add('CURRENCY_CODE' , ftString, 8);
Table.FieldDefs.Add('VAT_CODE' , ftString, 8);
Table.CreateDataSet();
Table.Active := True;
Parser.Parse('C:\FileName.csv', // FileName: string
6, // FieldCount: Integer - the
number of fields the file contains
';', // Delimiter: Char - field
delimiter
Form1.Parser_OnLine, // OnLine: TNotifyEvent - user
provided call back
True, // SkipFirstLine: Boolean - when
the first line contains field names
Table // Table: TDataset - could be nil
);
It is also possible to pass nil to the last parameter and use the call back
to access each parsed line. Here is an example
procedure TMainForm.Parser_OnLine(Sender: TObject);
var
i : Integer;
S : string;
begin
S := '';
for i := 0 to TCSVParser(Sender).FieldCount - 1 do
S := S + TCSVParser(Sender).Values[i] + '|';
Memo.Lines.Add(S);
end;
procedure TMainForm.btnExecuteClick(Sender: TObject);
const
LB = #13#10; { LineBreak }
cData = ' "a' + #13 + 'b"' + LB;
cData2 =
' "And", "when, the", "night is cloudy" ' + LB +
' "there", "is""' + #13 + 'still a light", "that shines on me" ' + LB +
' "shine until", "tomorrow", "let it be" ' + LB;
cData3 =
' And, when the, night is " cloudy ' + LB +
' there, is still a light, that " shines on me ' + LB +
' shine" until, tomorrow, let it be ' ;
var
SS : TStringStream;
MS : TMemoryStream;
Parser : TCSVParser;
begin
Memo.Clear;
SS := TStringStream.Create(cData3);
MS := TMemoryStream.Create;
try
MS.LoadFromStream(SS);
MS.Position := 0;
Parser := TCSVParser.Create;
try
Parser.Parse(MS.Memory, MS.Size, 3, ',', Parser_OnLine, False, nil);
finally
Parser.Free;
end;
finally
SS.Free;
MS.Free;
end;
end;
-----------------------------------------------------------------------------
There is NOT a CSV standard. The RFC 4180 is just an informational text
which "documents the format used for Comma-Separated Values (CSV) files
and registers the associated MIME type "text/csv". "
The RFC 4180 can be found at
http://www.rfc-editor.org/rfc/rfc4180.txt
http://tools.ietf.org/html/rfc4180
-----------------------------------------------------------------------------
Freeware, use it at your on risk.
Copyright © 2009 Theodoros Bebekis, Thessaloniki, Greece
(teo point bebekis at gmail point com)
----------------------------------------------------------------------------*)
TCSVParser = class(TObject)
private
FParserState : Integer;
FIsQuotedField : Boolean;
FSpaceCount : Integer;
FDelimiter : Char;
FBuffer : PChar;
FBufferPos : Integer;
FBufferSize : Integer;
FData : PChar;
FDataSize : Cardinal;
FDataPos : Cardinal;
FLineCount : Integer;
FFieldCount : Integer;
FValues : TStringDynArray;
FFieldIndex : Integer;
FIsJaggedLine : Boolean;
FOnLine : TNotifyEvent;
FSkipFirstLine : Boolean;
FTable : TDataset;
procedure Error(const Text: string);
public
procedure Parse(Data: Pointer; DataSize: Cardinal; FieldCount: Integer;
Delimiter: Char; OnLine: TNotifyEvent; SkipFirstLine: Boolean; Table: TDataset
= nil); overload;
procedure Parse(const FileName: string; FieldCount: Integer; Delimiter:
Char; OnLine: TNotifyEvent; SkipFirstLine: Boolean; Table: TDataset = nil);
overload;
property LineCount : Integer read FLineCount;
property FieldCount : Integer read FFieldCount;
property FieldIndex : Integer read FFieldIndex;
property IsJaggedLine : Boolean read FIsJaggedLine;
property Values : TStringDynArray read FValues;
end;
implementation
const
NUL = #0;
CR = #13 ;
LF = #10 ;
DQUOTE = '"' ;
{ curren field buffer block size }
const
BLOCK_SIZE = 128;
MAX_BLOCK_SIZE = 1024 * 8;
{ parser state }
const
PS_IN_NEW_LINE = 1;
PS_INSIDE_FIELD = 2;
PS_OUTSIDE_FIELD = 3;
{ TCSVParser }
(*----------------------------------------------------------------------------*)
procedure TCSVParser.Error(const Text: string);
begin
raise Exception.CreateFmt('Error in line: %d. ' + Text, [FLineCount]);
end;
(*----------------------------------------------------------------------------*)
procedure TCSVParser.Parse(Data: Pointer; DataSize: Cardinal; FieldCount:
Integer; Delimiter: Char; OnLine: TNotifyEvent; SkipFirstLine: Boolean; Table:
TDataset);
var
C : Char;
{---------------------------------------}
procedure ReallocBuffer;
begin
if FBufferSize + BLOCK_SIZE > MAX_BLOCK_SIZE then
Error('Field data too long');
FBuffer := ReallocMemory(FBuffer, FBufferSize + BLOCK_SIZE);
FBufferSize := FBufferSize + BLOCK_SIZE;
end;
{---------------------------------------}
function IsBlank(): Boolean;
begin
Result := C in [#1..#32]
end;
{---------------------------------------}
function IsEscapeQuote: Boolean;
begin
Result := False;
if (FDataSize > FDataPos) then
Result := FData[FDataPos + 1] = DQUOTE;
end;
{---------------------------------------}
procedure AddChar();
begin
FBuffer[FBufferPos] := C;
FBufferPos := FBufferPos + 1;
end;
{---------------------------------------}
procedure AddField();
begin
if not FIsQuotedField then
FBufferPos := FBufferPos - FSpaceCount;
SetString(FValues[FFieldIndex], FBuffer, FBufferPos);
FFieldIndex := FFieldIndex + 1;
FBufferPos := 0;
FSpaceCount := 0;
FIsQuotedField := False;
end;
{---------------------------------------}
procedure AddLine();
var
i : Integer;
begin
FIsJaggedLine := FFieldIndex <> FFieldCount - 1;
if FIsJaggedLine then
for i := FFieldIndex + 1 to FFieldCount - 1 do
FValues[i] := '';
if not (FSkipFirstLine and (FLineCount = 0)) then
begin
if Assigned(FTable) then
begin
FTable.Append;
FTable.Edit();
for i := 0 to FFieldIndex - 1 do
FTable.Fields[i].AsString := FValues[i];
FTable.Post();
end;
if Assigned(FOnLine) then
FOnLine(Self);
end;
FFieldIndex := 0;
FBufferPos := 0;
FSpaceCount := 0;
FIsQuotedField := False;
FLineCount := FLineCount + 1;
end;
{---------------------------------------}
begin
FData := Data;
FDataSize := DataSize;
FDataPos := 0;
FOnLine := OnLine;
FLineCount := 0;
FFieldCount := FieldCount;
SetLength(FValues, FieldCount);
FFieldIndex := 0;
FIsJaggedLine := False;
FSkipFirstLine := SkipFirstLine;
FTable := Table;
FDelimiter := Delimiter;
FSpaceCount := 0;
FIsQuotedField := False;
FParserState := PS_IN_NEW_LINE;
FBufferPos := 0;
ReallocBuffer();
try
while (FDataPos < FDataSize) do
begin
if (FBufferPos = FBufferSize) then
ReallocBuffer();
Inc(FDataPos);
C := FData[FDataPos];
if (C = NUL) then
Continue;
case FParserState of
PS_IN_NEW_LINE : if (C = FDelimiter) then
Error('New line can not start with Delimiter')
else if not IsBlank() then
begin
if (C = DQUOTE) then
FIsQuotedField := True
else
AddChar();
FParserState := PS_INSIDE_FIELD;
end;
{ inside a quoted field }
PS_INSIDE_FIELD : if FIsQuotedField then
begin
if (C = DQUOTE) then
begin
if IsEscapeQuote() then
begin
AddChar();
Inc(FDataPos); // skip the second quote
end else begin
AddField();
FParserState := PS_OUTSIDE_FIELD;
end;
end else
AddChar();
{ inside a non-quoted field }
end else begin
if (C in [CR, LF]) then
begin
AddField();
AddLine();
FParserState := PS_IN_NEW_LINE;
end else if (C = FDelimiter) then
begin
AddField();
FParserState := PS_OUTSIDE_FIELD;
end else
begin
AddChar();
if IsBlank() then
FSpaceCount := FSpaceCount + 1
else
FSpaceCount := 0;
end;
end;
PS_OUTSIDE_FIELD : if (C in [CR, LF]) then
begin
AddLine();
FParserState := PS_IN_NEW_LINE;
end else if (not IsBlank()) and (C <> FDelimiter)
then
begin
if (C = DQUOTE) then
FIsQuotedField := True
else
AddChar();
FParserState := PS_INSIDE_FIELD;
end;
end;
end;
finally
FreeMemory(FBuffer) ;
FBuffer := nil;
end;
end;
(*----------------------------------------------------------------------------*)
procedure TCSVParser.Parse(const FileName: string; FieldCount: Integer;
Delimiter: Char; OnLine: TNotifyEvent; SkipFirstLine: Boolean; Table: TDataset);
var
MS : TMemoryStream;
sNul : Char;
begin
if FileExists(FileName) then
begin
MS := TMemoryStream.Create;
try
MS.LoadFromFile(FileName);
MS.Position := 0;
if MS.Size > 0 then
begin
MS.Position := MS.Position + MS.Size;
sNul := NUL;
MS.WriteBuffer(sNul, 1);
MS.Position := 0;
Parse(MS.Memory, MS.Size, FieldCount, Delimiter, OnLine,
SkipFirstLine, Table);
end;
finally
MS.Free;
end;
end;
end;
end.
//=============================================================
--
Regards
Theo
------------------------
Theo Bebekis
Thessaloniki, Greece
------------------------
C# and Delphi tutorials at http://teo.bebekis.googlepages.com/
------------------------
Greek_Delphi_Prog : a greek Delphi list at
http://groups.yahoo.com/group/Greek_Delphi_Prog
CSharpDotNetGreek : a greek C# and .Net list at
http://groups.yahoo.com/group/CSharpDotNetGreek
atla_custom : an ALTEC Atlantis Customization list at
http://groups.yahoo.com/group/atla_custom
------------------------
------------------------------------
-----------------------------------------------------
Home page: http://groups.yahoo.com/group/delphi-en/
To unsubscribe: [email protected]! Groups Links
<*> To visit your group on the web, go to:
http://groups.yahoo.com/group/delphi-en/
<*> Your email settings:
Individual Email | Traditional
<*> To change settings online go to:
http://groups.yahoo.com/group/delphi-en/join
(Yahoo! ID required)
<*> To change settings via email:
mailto:[email protected]
mailto:[email protected]
<*> To unsubscribe from this group, send an email to:
[email protected]
<*> Your use of Yahoo! Groups is subject to:
http://docs.yahoo.com/info/terms/