Author: atsushi
Date: 2005-06-27 01:52:32 -0400 (Mon, 27 Jun 2005)
New Revision: 46534

Added:
   trunk/mcs/class/I18N/CJK/CP50221.cs
Modified:
   trunk/mcs/class/I18N/CJK/I18N.CJK.dll.sources
   trunk/mcs/class/I18N/Common/Handlers.cs
Log:
2006-06-27  Atsushi Enomoto  <[EMAIL PROTECTED]>

        * CJK/CP50221.cs : new file. Support iso-2022-jp.
        * CJK/I18N.CJK.dll.sources : added CP50221.cs
        * Common/Handler.cs : added ENCiso_2022_jp to encoding type list.



Added: trunk/mcs/class/I18N/CJK/CP50221.cs
===================================================================
--- trunk/mcs/class/I18N/CJK/CP50221.cs 2005-06-27 03:58:36 UTC (rev 46533)
+++ trunk/mcs/class/I18N/CJK/CP50221.cs 2005-06-27 05:52:32 UTC (rev 46534)
@@ -0,0 +1,296 @@
+using System;
+using System.Text;
+
+namespace I18N.CJK
+{
+       public class CP50221Encoding : Encoding
+       {
+               static JISConvert convert = JISConvert.Convert;
+
+               public override string BodyName {
+                       get { return "iso-2022-jp"; }
+               }
+
+               public override int CodePage {
+                       get { return 50221; }
+               }
+
+               public override string HeaderName {
+                       get { return "iso-2022-jp"; }
+               }
+
+               public override string WebName {
+                       get { return "csISO2022JP"; }
+               }
+
+               public override string EncodingName {
+                       get { return "Japanese (JIS-Allow 1 byte Kana)"; }
+               }
+
+               public override int GetMaxByteCount (int charCount)
+               {
+                       // ESC w ESC s ESC w ... (even number) ESC s
+                       return charCount / 2 * 5 + 4;
+               }
+
+               public override int GetMaxCharCount (int byteCount)
+               {
+                       // no escape sequence
+                       return byteCount;
+               }
+
+               public override int GetByteCount (char [] chars, int charIndex, 
int charCount)
+               {
+                       return new CP50221Encoder ().GetByteCount (chars, 
charIndex, charCount, true);
+               }
+
+               public override int GetBytes (char [] chars, int charIndex, int 
charCount, byte [] bytes, int byteIndex)
+               {
+                       return new CP50221Encoder ().GetBytes (chars, 
charIndex, charCount, bytes, byteIndex, true);
+               }
+
+               public override int GetCharCount (byte [] bytes, int index, int 
count)
+               {
+                       return new CP50221Decoder ().GetCharCount (bytes, 
index, count);
+               }
+
+               public override int GetChars (byte [] bytes, int byteIndex, int 
byteCount, char [] chars, int charIndex)
+               {
+                       return new CP50221Decoder ().GetChars (bytes, 
byteIndex, byteCount, chars, charIndex);
+               }
+       }
+
+       internal enum CP50221Mode {
+               ASCII,
+               JISX0208,
+               JISX0201
+       }
+
+       internal class CP50221Encoder : Encoder
+       {
+               static JISConvert convert = JISConvert.Convert;
+
+               CP50221Mode m = CP50221Mode.ASCII;
+
+               public override int GetByteCount (char [] chars, int charIndex, 
int charCount, bool flush)
+               {
+                       int end = charIndex + charCount;
+                       int value;
+                       int byteCount = 0;
+
+                       for (int i = charIndex; i < end; i++) {
+                               char ch = chars [i];
+                               if (ch >= 0x2010 && ch <= 0x9FA5)
+                               {
+                                       if (m != CP50221Mode.JISX0208)
+                                               byteCount += 3;
+                                       m = CP50221Mode.JISX0208;
+                                       // This range contains the bulk of the 
CJK set.
+                                       value = (ch - 0x2010) * 2;
+                                       value = 
((int)(convert.cjkToJis[value])) |
+                                                       
(((int)(convert.cjkToJis[value + 1])) << 8);
+                               } else if (ch >= 0xFF01 && ch <= 0xFF60) {
+                                       if (m != CP50221Mode.JISX0208)
+                                               byteCount += 3;
+                                       m = CP50221Mode.JISX0208;
+
+                                       // This range contains extra characters,
+                                       value = (ch - 0xFF01) * 2;
+                                       value = 
((int)(convert.extraToJis[value])) |
+                                                       
(((int)(convert.extraToJis[value + 1])) << 8);
+                               } else if(ch >= 0xFF60 && ch <= 0xFFA0) {
+                                       if (m != CP50221Mode.JISX0201)
+                                               byteCount += 3;
+                                       m = CP50221Mode.JISX0201;
+                                       value = ch - 0xFF60 + 0xA0;
+                               } else if (ch < 128) {
+                                       if (m != CP50221Mode.ASCII)
+                                               byteCount += 3;
+                                       m = CP50221Mode.ASCII;
+                                       value = (int) ch;
+                               } else
+                                       // skip non-convertible character
+                                       continue;
+
+                               if (value > 0x100)
+                                       byteCount += 2;
+                               else
+                                       byteCount++;
+                       }
+                       // must end in ASCII mode
+                       if (flush && m != CP50221Mode.ASCII) {
+                               byteCount += 3;
+                               m = CP50221Mode.ASCII;
+                       }
+                       return byteCount;
+               }
+
+               // returns false if it failed to add required ESC.
+               private bool SwitchMode (byte [] bytes, ref int byteIndex,
+                       CP50221Mode cur, CP50221Mode next)
+               {
+                       if (cur == next)
+                               return true;
+                       if (bytes.Length <= byteIndex + 3)
+                               return false;
+                       bytes [byteIndex++] = 0x1B;
+                       bytes [byteIndex++] = (byte) (next == 
CP50221Mode.JISX0208 ? 0x24 : 0x28);
+                       bytes [byteIndex++] = (byte) (next == 
CP50221Mode.JISX0201 ? 0x49 : 0x42);
+                       return true;
+               }
+
+               public override int GetBytes (char [] chars, int charIndex, int 
charCount, byte [] bytes, int byteIndex, bool flush)
+               {
+                       bool wide = false;
+                       int start = byteIndex;
+
+                       int end = charIndex + charCount;
+                       int value;
+
+                       for (int i = charIndex; i < end &&
+                               byteIndex < bytes.Length + (wide ? 1 : 0); i++) 
{
+                               char ch = chars [i];
+                               if (ch >= 0x2010 && ch <= 0x9FA5)
+                               {
+                                       if (!SwitchMode (bytes, ref byteIndex, 
m, CP50221Mode.JISX0208))
+                                               break;
+                                       m = CP50221Mode.JISX0208;
+                                       // This range contains the bulk of the 
CJK set.
+                                       value = (ch - 0x2010) * 2;
+                                       value = 
((int)(convert.cjkToJis[value])) |
+                                                       
(((int)(convert.cjkToJis[value + 1])) << 8);
+                               } else if (ch >= 0xFF01 && ch <= 0xFF60) {
+                                       if (!SwitchMode (bytes, ref byteIndex, 
m, CP50221Mode.JISX0208))
+                                               break;
+                                       m = CP50221Mode.JISX0208;
+
+                                       // This range contains extra characters,
+                                       value = (ch - 0xFF01) * 2;
+                                       value = 
((int)(convert.extraToJis[value])) |
+                                                       
(((int)(convert.extraToJis[value + 1])) << 8);
+                               } else if(ch >= 0xFF60 && ch <= 0xFFA0) {
+                                       if (!SwitchMode (bytes, ref byteIndex, 
m, CP50221Mode.JISX0201))
+                                               break;
+                                       m = CP50221Mode.JISX0201;
+                                       value = ch - 0xFF60 + 0xA0;
+                               } else if (ch < 128) {
+                                       if (!SwitchMode (bytes, ref byteIndex, 
m, CP50221Mode.ASCII))
+                                               break;
+                                       m = CP50221Mode.ASCII;
+                                       value = (int) ch;
+                               } else
+                                       // skip non-convertible character
+                                       continue;
+
+//Console.WriteLine ("{0:X04} : {1:x02} {2:x02}", v, (int) v / 94 + 33, v % 94 
+ 33);
+                               if (value > 0x100) {
+                                       value -= 0x0100;
+                                       bytes [byteIndex++] = (byte) (value / 
94 + 33);
+                                       bytes [byteIndex++] = (byte) (value % 
94 + 33);
+                               }
+                               else
+                                       bytes [byteIndex++] = (byte) value;
+                       }
+                       if (flush) {
+                               // must end in ASCII mode
+                               SwitchMode (bytes, ref byteIndex, m, 
CP50221Mode.ASCII);
+                               m = CP50221Mode.ASCII;
+                       }
+                       return byteIndex - start;
+               }
+       }
+
+       internal class CP50221Decoder : Decoder
+       {
+               static JISConvert convert = JISConvert.Convert;
+
+               // GetCharCount
+               public override int GetCharCount (byte [] bytes, int index, int 
count)
+               {
+                       int ret = 0;
+
+                       int end = index + count;
+                       for (int i = index; i < end; i++) {
+                               if (bytes [i] != 0x1B) {
+                                       ret++;
+                                       continue;
+                               } else {
+                                       if (i + 2 >= end)
+                                               break; // incomplete escape 
sequence
+                                       i++;
+                                       if (bytes [i] != 0x24 &&
+                                               bytes [i] != 0x28)
+                                               throw new ArgumentException 
("Unexpected ISO-2022-JP escape sequence.");
+                                       i++;
+                                       if (bytes [i] != 0x42)
+                                               throw new ArgumentException 
("Unexpected ISO-2022-JP escape sequence.");
+                               }
+                       }
+
+                       return ret;
+               }
+
+               private char ToChar (int value)
+               {
+                       value <<= 1;
+                       return value >= convert.jisx0208ToUnicode.Length ? '?' :
+                               (char) (((int) (convert.jisx0208ToUnicode 
[value])) |
+                                       (((int) (convert.jisx0208ToUnicode 
[value + 1])) << 8));
+               }
+
+               public override int GetChars (byte [] bytes, int byteIndex, int 
byteCount, char [] chars, int charIndex)
+               {
+                       CP50221Mode m = CP50221Mode.ASCII;
+                       int start = charIndex;
+                       int end = byteIndex + byteCount;
+                       for (int i = byteIndex; i < end && charIndex < 
chars.Length; i++) {
+                               if (bytes [i] != 0x1B) {
+                                       if (m == CP50221Mode.JISX0208) {
+                                               if (i + 1 == end)
+                                                       break; // incomplete 
head of wide char
+
+                                               // am so lazy, so reusing 
jis2sjis and 
+                                               int s1 = ((bytes [i] - 1) >> 1) 
+ ((bytes [i] <= 0x5e) ? 0x71 : 0xb1);
+                                               int s2 = bytes [i + 1] + 
(((bytes [i] & 1) != 0) ? 0x20 : 0x7e);
+                                               int v = (s1 - 0x81) * 0xBC;
+                                               v += s2 - 0x41;
+
+                                               chars [charIndex++] = ToChar 
(v);
+                                               i++;
+                                       }
+                                       else if (m == CP50221Mode.JISX0201)
+                                               chars [charIndex++] = (char) 
(bytes [i] + 0xFF40);
+                                       else
+                                               chars [charIndex++] = (char) 
bytes [i];
+                                       continue;
+                               } else {
+                                       if (i + 2 >= end)
+                                               break; // incomplete escape 
sequence
+                                       i++;
+                                       bool wide = false;
+                                       if (bytes [i] == 0x24)
+                                               wide = true;
+                                       else if (bytes [i] == 0x28)
+                                               wide = false;
+                                       else
+                                               throw new ArgumentException 
("Unexpected ISO-2022-JP escape sequence.");
+                                       i++;
+                                       if (bytes [i] == 0x42)
+                                               m = wide ? CP50221Mode.JISX0208 
: CP50221Mode.ASCII;
+                                       else if (bytes [i] == 0x49)
+                                               m = CP50221Mode.JISX0201;
+                                       else
+                                               throw new ArgumentException 
(String.Format ("Unexpected ISO-2022-JP escape sequence. Ended with 0x{0:X04}", 
bytes [i]));
+                               }
+                       }
+
+                       return charIndex - start;
+               }
+       }
+
+       public class ENCiso_2022_jp : CP50221Encoding
+       {
+               public ENCiso_2022_jp () : base() {}
+
+       }; // class ENCiso_2022_jp
+}


Property changes on: trunk/mcs/class/I18N/CJK/CP50221.cs
___________________________________________________________________
Name: svn:eol-style
   + native

Modified: trunk/mcs/class/I18N/CJK/I18N.CJK.dll.sources
===================================================================
--- trunk/mcs/class/I18N/CJK/I18N.CJK.dll.sources       2005-06-27 03:58:36 UTC 
(rev 46533)
+++ trunk/mcs/class/I18N/CJK/I18N.CJK.dll.sources       2005-06-27 05:52:32 UTC 
(rev 46534)
@@ -1,6 +1,7 @@
 ../Assembly/AssemblyInfo.cs
 Big5Convert.cs
 CP932.cs
+CP50221.cs
 CP51932.cs
 CP936.cs
 CP949.cs

Modified: trunk/mcs/class/I18N/Common/Handlers.cs
===================================================================
--- trunk/mcs/class/I18N/Common/Handlers.cs     2005-06-27 03:58:36 UTC (rev 
46533)
+++ trunk/mcs/class/I18N/Common/Handlers.cs     2005-06-27 05:52:32 UTC (rev 
46534)
@@ -39,10 +39,12 @@
         "I18N.CJK.CP936",
         "I18N.CJK.CP949",
         "I18N.CJK.CP950",
+        "I18N.CJK.CP50221",
         "I18N.CJK.CP51932",
         "I18N.CJK.ENCbig5",
         "I18N.CJK.ENCgb2312",
         "I18N.CJK.ENCshift_jis",
+        "I18N.CJK.ENCiso_2022_jp",
         "I18N.CJK.ENCeuc_jp",
         "I18N.CJK.ENCuhc",
         "I18N.MidEast.CP1254",

_______________________________________________
Mono-patches maillist  -  [email protected]
http://lists.ximian.com/mailman/listinfo/mono-patches

Reply via email to