[lazarus]

Vasily I. Volchenko Fri, 11 Jan 2008 07:35:23 -0800

Here is a patch to svn lazarus. It enables {%encoding xxx} mechanism. Some 
changes are not very good, but... It enables hack which allows to use 
cp1251/koi8r LFM in gtk2 pseudo UTF (Hint='{%encoding=cp1251}'). That hack 
works partially on win32. Anyway, it is good for translating old/win32 
projects. Course, string constants in ShowMessage('.......') are bad.

diff -u -r --minimal /home/vvi/svn/lazarus/ide/main.pp /home/vvi/src/lazarus1/ide/main.pp
--- /home/vvi/svn/lazarus/ide/main.pp	2008-01-04 09:45:19.000000000 +0000
+++ /home/vvi/src/lazarus1/ide/main.pp	2008-01-10 19:24:28.000000000 +0000
@@ -11047,14 +11047,21 @@
   const Filename: string; var Source, DiskEncoding, MemEncoding: string);
 begin
   //DebugLn(['TMainIDE.OnCodeBufferDecodeLoaded Filename=',Filename,' Encoding=',GuessEncoding(Source)]);
-  //DiskEncoding:=GuessEncoding(Source);
-  //MemEncoding:=EncodingUTF8;
+  DiskEncoding:=GuessEncoding(Source);
+  MemEncoding:=LazarusEncoding;
+  if DiskEncoding=MemEncoding then exit;
+  Source:=ConvertEncoding(Source,DiskEncoding,MemEncoding);
 end;
 
 procedure TMainIDE.OnCodeBufferEncodeSaving(Code: TCodeBuffer;
   const Filename: string; var Source: string);
+var
+  DiskEncoding: String;
 begin
-
+  DiskEncoding:=GuessEncoding(Source,true);
+  if DiskEncoding=EncodingNoTranslation then
+    exit;//This will be if no %encoding found in source
+  Source:=ConvertEncoding(Source,LazarusEncoding,DiskEncoding);
 end;
 
 procedure TMainIDE.CodeToolBossPrepareTree(Sender: TObject);
diff -u -r --minimal /home/vvi/svn/lazarus/ide/revision.inc /home/vvi/src/lazarus1/ide/revision.inc
--- /home/vvi/svn/lazarus/ide/revision.inc	2007-09-08 11:28:42.000000000 +0100
+++ /home/vvi/src/lazarus1/ide/revision.inc	2008-01-10 19:24:28.000000000 +0000
@@ -1,2 +1,2 @@
 // Created by Svn2RevisionInc
-const RevisionStr = '11846';
+const RevisionStr = '13612';
diff -u -r --minimal /home/vvi/svn/lazarus/lcl/lconvencoding.pas /home/vvi/src/lazarus1/lcl/lconvencoding.pas
--- /home/vvi/svn/lazarus/lcl/lconvencoding.pas	2007-12-31 12:08:13.000000000 +0000
+++ /home/vvi/src/lazarus1/lcl/lconvencoding.pas	2008-01-10 19:24:28.000000000 +0000
@@ -31,8 +31,21 @@
 
 const
   EncodingUTF8 = 'utf8';
-
-function GuessEncoding(const s: string): string;
+  LazarusEncoding = {$ifdef MSWindows}
+   {$ifdef WindowsUnicodeSupport}
+   EncodingUTF8
+   {$else}
+   'ANSI'
+   {$endif}
+  {$else}
+   {$ifndef LCLgtk}
+   EncodingUTF8
+   {$else}
+   'ANSI'
+   {$endif}
+  {$endif};
+  EncodingNoTranslation = 'no_translation';
+function GuessEncoding(const s: string;OnlyForSaving:boolean=false): string;
 
 function ConvertEncoding(const s, FromEncoding, ToEncoding: string): string;
 
@@ -281,12 +294,13 @@
   end;
 end;
 
-function GuessEncoding(const s: string): string;
+function GuessEncoding(const s: string;OnlyForSaving:boolean=false): string;
 var
   l: Integer;
   p: Integer;
   EndPos: LongInt;
   i: LongInt;
+  k: Int64;
   
   function CompareI(p1, p2: PChar; Count: integer): boolean;
   var
@@ -316,20 +330,29 @@
     Result:='';
     exit;
   end;
+  // try %encoding eee
+  // Placing %encoding in the beginning is a good idea, but this won't allow
+  // our "LFM-hacks". Yes, this is not so good, but it is the only reasonable
+  // way to make Lazarus really cross-platform before encoding unification
+  // Besides, file beginning can contain spaces, CR+LF,BOM or any other waste.
+  // Or just some autoinserted or beginning-requiring comments.
+  k:=pos('{'+'%encoding ',s);
   
-  // try BOM
-  if CompareI(@s[1],#$EF#$BB#$BF,3) then begin
-    Result:=EncodingUTF8;
-    exit;
-  end;
-  
-  // try {%encoding eee}
-  if CompareI(@s[1],'{%encoding ',11) then begin
+  if (k>0)and CompareI(@s[k],'{'+'%encoding ',11) then begin
     p:=12;
-    while (p<=l) and (s[p] in [' ',#9]) do inc(p);
+    while (p<=l) and (s[k+p-1] in [' ',#9]) do inc(p);
     EndPos:=p;
-    while (EndPos<=l) and (not (s[EndPos] in ['}',' ',#9])) do inc(EndPos);
-    Result:=copy(s,p,EndPos-p);
+    while (EndPos<=l) and (not (s[k+EndPos-1] in ['}',' ',#9,#13,#10])) do inc(EndPos);
+    Result:=copy(s,k+p-1,EndPos-p);
+    exit;
+  end;
+
+  // OK, if no encoding found, we shouldn't translate anyway
+  if OnlyForSaving then begin Result:=EncodingNoTranslation;exit;end;
+
+  // try BOM. It is placed AFTER %encoding because convenience for user
+  if CompareI(@s[1],#$EF#$BB#$BF,3) then begin
+    Result:=EncodingUTF8;
     exit;
   end;
   
@@ -358,8 +381,24 @@
 var AFrom,ATo:string;
     SL:TStringList;
     FN1,FN2:string;
+ function NormEncoding(const Enc:string):string;//Or make it separated function?
+ var
+   i: Integer;
+ begin
+   Result:=AnsiLowerCase(Enc);
+   i:=Pos('-',Result);
+   While i>0 do
+   begin
+     Delete(Result,i,1);
+     i:=Pos('-',Result);
+   end;
+ end;
 begin
   Result:=s;
+  if (FromEncoding=EncodingNoTranslation)or(ToEncoding=EncodingNoTranslation)
+   then exit;//Theoretically we shouldn''t be here, but this should work anyway
+  if NormEncoding(FromEncoding)=NormEncoding(ToEncoding) then exit;
+  
   AFrom:=LowerCase(FromEncoding);
   ATo:=LowerCase(ToEncoding);
   if AFrom=ATo then exit;
@@ -375,6 +414,8 @@
     if AFrom='cp1251' then begin Result:=Cp1251toUTF(s);exit;end;
     if AFrom='koi8-r' then begin Result:=Cp1251toUTF(Koi8rToCP1251(s));exit;end;
   end;
+  if (AFrom='ansi')and(ATo=EncodingUTF8) then begin Result:=AnsiToUtf8(s);exit; end;
+  if (AFrom=EncodingUTF8)and(ATo='ansi') then begin Result:=Utf8ToAnsi(s);exit; end;
   //Stupid code. Works anyway, but extra-slow
   {$ifdef Unix}
   DebugLn(['CPConvert NOTE: using slow iconv workaround to convert from ',AFrom,' to ',ATo]);

[lazarus]

Reply via email to