Here is a patch to svn lazarus. It enables {%encoding xxx} mechanism. Some changes are not very good, but... It enables hack which allows to use cp1251/koi8r LFM in gtk2 pseudo UTF (Hint='{%encoding=cp1251}'). That hack works partially on win32. Anyway, it is good for translating old/win32 projects. Course, string constants in ShowMessage('.......') are bad.
diff -u -r --minimal /home/vvi/svn/lazarus/ide/main.pp /home/vvi/src/lazarus1/ide/main.pp --- /home/vvi/svn/lazarus/ide/main.pp 2008-01-04 09:45:19.000000000 +0000 +++ /home/vvi/src/lazarus1/ide/main.pp 2008-01-10 19:24:28.000000000 +0000 @@ -11047,14 +11047,21 @@ const Filename: string; var Source, DiskEncoding, MemEncoding: string); begin //DebugLn(['TMainIDE.OnCodeBufferDecodeLoaded Filename=',Filename,' Encoding=',GuessEncoding(Source)]); - //DiskEncoding:=GuessEncoding(Source); - //MemEncoding:=EncodingUTF8; + DiskEncoding:=GuessEncoding(Source); + MemEncoding:=LazarusEncoding; + if DiskEncoding=MemEncoding then exit; + Source:=ConvertEncoding(Source,DiskEncoding,MemEncoding); end; procedure TMainIDE.OnCodeBufferEncodeSaving(Code: TCodeBuffer; const Filename: string; var Source: string); +var + DiskEncoding: String; begin - + DiskEncoding:=GuessEncoding(Source,true); + if DiskEncoding=EncodingNoTranslation then + exit;//This will be if no %encoding found in source + Source:=ConvertEncoding(Source,LazarusEncoding,DiskEncoding); end; procedure TMainIDE.CodeToolBossPrepareTree(Sender: TObject); diff -u -r --minimal /home/vvi/svn/lazarus/ide/revision.inc /home/vvi/src/lazarus1/ide/revision.inc --- /home/vvi/svn/lazarus/ide/revision.inc 2007-09-08 11:28:42.000000000 +0100 +++ /home/vvi/src/lazarus1/ide/revision.inc 2008-01-10 19:24:28.000000000 +0000 @@ -1,2 +1,2 @@ // Created by Svn2RevisionInc -const RevisionStr = '11846'; +const RevisionStr = '13612'; diff -u -r --minimal /home/vvi/svn/lazarus/lcl/lconvencoding.pas /home/vvi/src/lazarus1/lcl/lconvencoding.pas --- /home/vvi/svn/lazarus/lcl/lconvencoding.pas 2007-12-31 12:08:13.000000000 +0000 +++ /home/vvi/src/lazarus1/lcl/lconvencoding.pas 2008-01-10 19:24:28.000000000 +0000 @@ -31,8 +31,21 @@ const EncodingUTF8 = 'utf8'; - -function GuessEncoding(const s: string): string; + LazarusEncoding = {$ifdef MSWindows} + {$ifdef WindowsUnicodeSupport} + EncodingUTF8 + {$else} + 'ANSI' + {$endif} + {$else} + {$ifndef LCLgtk} + EncodingUTF8 + {$else} + 'ANSI' + {$endif} + {$endif}; + EncodingNoTranslation = 'no_translation'; +function GuessEncoding(const s: string;OnlyForSaving:boolean=false): string; function ConvertEncoding(const s, FromEncoding, ToEncoding: string): string; @@ -281,12 +294,13 @@ end; end; -function GuessEncoding(const s: string): string; +function GuessEncoding(const s: string;OnlyForSaving:boolean=false): string; var l: Integer; p: Integer; EndPos: LongInt; i: LongInt; + k: Int64; function CompareI(p1, p2: PChar; Count: integer): boolean; var @@ -316,20 +330,29 @@ Result:=''; exit; end; + // try %encoding eee + // Placing %encoding in the beginning is a good idea, but this won't allow + // our "LFM-hacks". Yes, this is not so good, but it is the only reasonable + // way to make Lazarus really cross-platform before encoding unification + // Besides, file beginning can contain spaces, CR+LF,BOM or any other waste. + // Or just some autoinserted or beginning-requiring comments. + k:=pos('{'+'%encoding ',s); - // try BOM - if CompareI(@s[1],#$EF#$BB#$BF,3) then begin - Result:=EncodingUTF8; - exit; - end; - - // try {%encoding eee} - if CompareI(@s[1],'{%encoding ',11) then begin + if (k>0)and CompareI(@s[k],'{'+'%encoding ',11) then begin p:=12; - while (p<=l) and (s[p] in [' ',#9]) do inc(p); + while (p<=l) and (s[k+p-1] in [' ',#9]) do inc(p); EndPos:=p; - while (EndPos<=l) and (not (s[EndPos] in ['}',' ',#9])) do inc(EndPos); - Result:=copy(s,p,EndPos-p); + while (EndPos<=l) and (not (s[k+EndPos-1] in ['}',' ',#9,#13,#10])) do inc(EndPos); + Result:=copy(s,k+p-1,EndPos-p); + exit; + end; + + // OK, if no encoding found, we shouldn't translate anyway + if OnlyForSaving then begin Result:=EncodingNoTranslation;exit;end; + + // try BOM. It is placed AFTER %encoding because convenience for user + if CompareI(@s[1],#$EF#$BB#$BF,3) then begin + Result:=EncodingUTF8; exit; end; @@ -358,8 +381,24 @@ var AFrom,ATo:string; SL:TStringList; FN1,FN2:string; + function NormEncoding(const Enc:string):string;//Or make it separated function? + var + i: Integer; + begin + Result:=AnsiLowerCase(Enc); + i:=Pos('-',Result); + While i>0 do + begin + Delete(Result,i,1); + i:=Pos('-',Result); + end; + end; begin Result:=s; + if (FromEncoding=EncodingNoTranslation)or(ToEncoding=EncodingNoTranslation) + then exit;//Theoretically we shouldn''t be here, but this should work anyway + if NormEncoding(FromEncoding)=NormEncoding(ToEncoding) then exit; + AFrom:=LowerCase(FromEncoding); ATo:=LowerCase(ToEncoding); if AFrom=ATo then exit; @@ -375,6 +414,8 @@ if AFrom='cp1251' then begin Result:=Cp1251toUTF(s);exit;end; if AFrom='koi8-r' then begin Result:=Cp1251toUTF(Koi8rToCP1251(s));exit;end; end; + if (AFrom='ansi')and(ATo=EncodingUTF8) then begin Result:=AnsiToUtf8(s);exit; end; + if (AFrom=EncodingUTF8)and(ATo='ansi') then begin Result:=Utf8ToAnsi(s);exit; end; //Stupid code. Works anyway, but extra-slow {$ifdef Unix} DebugLn(['CPConvert NOTE: using slow iconv workaround to convert from ',AFrom,' to ',ATo]);