Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib/MHonArc/CharEnt
In directory subversions:/tmp/cvs-serv368/lib/MHonArc/CharEnt

Added Files:
        BIG5_HKSCS.pm CP936.pm CP950.pm GB2312.pm 
Log Message:
* MHonArc::CharEnt:
  + Added support for Chinese charsets.
  + Added support for converting UTF-8.
  * Optimized the conversion code to use s/// operation for conversion.
    Some simpling testing shows it is faster than the loop/substr()
    method (mainly because character iteration is now done in the
    perl internals).
  - Removed $8bitonly option to str2sgml().  It not very applicable
    now with all the newer charsets added and it complicates the
    conversion code.
* Some additional charset aliases added.


--- NEW FILE ---

package MHonArc::CharEnt::BIG5_HKSCS;

# Hong Kong Chinese (BIG5-HKSCS):
#       Obtained from
#       <ftp://xcin.linux.org.tw/pub/xcin/i18n/charset/BIG5HKSCS.gz>
+{
  #--------------------------------------------------------------------------
  # Hex Code    Entity Ref      # ISO external entity and description
  #--------------------------------------------------------------------------
    0x8840 =>   '#xF303',       # CJK UNIFIED IDEOGRAPH
    0x8841 =>   '#xF304',       # CJK UNIFIED IDEOGRAPH
    0x8842 =>   '#xF305',       # CJK UNIFIED IDEOGRAPH
    0x8843 =>   '#xF306',       # CJK UNIFIED IDEOGRAPH
    0x8844 =>   '#xF307',       # CJK UNIFIED IDEOGRAPH
    0x8845 =>   '#xF308',       # CJK UNIFIED IDEOGRAPH
    0x8846 =>   '#xF309',       # CJK UNIFIED IDEOGRAPH
    0x8847 =>   '#xF30A',       # CJK UNIFIED IDEOGRAPH
    0x8848 =>   '#xF30B',       # CJK UNIFIED IDEOGRAPH
[...18128 lines suppressed...]
    0xFEEC =>   '#x8884',       # CJK UNIFIED IDEOGRAPH
    0xFEED =>   '#xE2FF',       # CJK UNIFIED IDEOGRAPH
    0xFEEE =>   '#xE300',       # CJK UNIFIED IDEOGRAPH
    0xFEEF =>   '#xE301',       # CJK UNIFIED IDEOGRAPH
    0xFEF0 =>   '#x7986',       # CJK UNIFIED IDEOGRAPH
    0xFEF1 =>   '#x8900',       # CJK UNIFIED IDEOGRAPH
    0xFEF2 =>   '#x6902',       # CJK UNIFIED IDEOGRAPH
    0xFEF3 =>   '#x7980',       # CJK UNIFIED IDEOGRAPH
    0xFEF4 =>   '#xE306',       # CJK UNIFIED IDEOGRAPH
    0xFEF5 =>   '#x799D',       # CJK UNIFIED IDEOGRAPH
    0xFEF6 =>   '#xE308',       # CJK UNIFIED IDEOGRAPH
    0xFEF7 =>   '#x793C',       # CJK UNIFIED IDEOGRAPH
    0xFEF8 =>   '#x79A9',       # CJK UNIFIED IDEOGRAPH
    0xFEF9 =>   '#x6E2A',       # CJK UNIFIED IDEOGRAPH
    0xFEFA =>   '#xE30C',       # CJK UNIFIED IDEOGRAPH
    0xFEFB =>   '#x3EA8',       # CJK UNIFIED IDEOGRAPH
    0xFEFC =>   '#x79C6',       # CJK UNIFIED IDEOGRAPH
    0xFEFD =>   '#xE30F',       # CJK UNIFIED IDEOGRAPH
    0xFEFE =>   '#x79D4',       # CJK UNIFIED IDEOGRAPH
};

--- NEW FILE ---

package MHonArc::CharEnt::CP936;

# Chinese cp936 (GBK)
+{
  #--------------------------------------------------------------------------
  # Hex Code    Entity Ref      # ISO external entity and description
  #--------------------------------------------------------------------------
    0x80 =>     '#x20AC',       # EURO SIGN
    0x8140 =>   '#x4E02',       # CJK UNIFIED IDEOGRAPH
    0x8141 =>   '#x4E04',       # CJK UNIFIED IDEOGRAPH
    0x8142 =>   '#x4E05',       # CJK UNIFIED IDEOGRAPH
    0x8143 =>   '#x4E06',       # CJK UNIFIED IDEOGRAPH
    0x8144 =>   '#x4E0F',       # CJK UNIFIED IDEOGRAPH
    0x8145 =>   '#x4E12',       # CJK UNIFIED IDEOGRAPH
    0x8146 =>   '#x4E17',       # CJK UNIFIED IDEOGRAPH
    0x8147 =>   '#x4E1F',       # CJK UNIFIED IDEOGRAPH
    0x8148 =>   '#x4E20',       # CJK UNIFIED IDEOGRAPH
    0x8149 =>   '#x4E21',       # CJK UNIFIED IDEOGRAPH
[...21762 lines suppressed...]
    0xFD9E =>   '#xF995',       # CJK COMPATIBILITY IDEOGRAPH
    0xFD9F =>   '#xF9E7',       # CJK COMPATIBILITY IDEOGRAPH
    0xFDA0 =>   '#xF9F1',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE40 =>   '#xFA0C',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE41 =>   '#xFA0D',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE42 =>   '#xFA0E',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE43 =>   '#xFA0F',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE44 =>   '#xFA11',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE45 =>   '#xFA13',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE46 =>   '#xFA14',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE47 =>   '#xFA18',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE48 =>   '#xFA1F',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE49 =>   '#xFA20',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE4A =>   '#xFA21',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE4B =>   '#xFA23',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE4C =>   '#xFA24',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE4D =>   '#xFA27',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE4E =>   '#xFA28',       # CJK COMPATIBILITY IDEOGRAPH
    0xFE4F =>   '#xFA29',       # CJK COMPATIBILITY IDEOGRAPH
};

--- NEW FILE ---

package MHonArc::CharEnt::CP950;

# Chinese cp950 (BIG5):
#       Derived from cp950.txt from unicode.org and
#       <ftp://xcin.linux.org.tw/pub/xcin/i18n/charset/BIG5.gz>
+{
  #--------------------------------------------------------------------------
  # Hex Code    Entity Ref      # ISO external entity and description
  #--------------------------------------------------------------------------
    0xA140 =>   '#x3000',       # IDEOGRAPHIC SPACE
    0xA141 =>   '#xFF0C',       # FULLWIDTH COMMA
    0xA142 =>   '#x3001',       # IDEOGRAPHIC COMMA
    0xA143 =>   '#x3002',       # IDEOGRAPHIC FULL STOP
    0xA144 =>   '#xFF0E',       # FULLWIDTH FULL STOP
    0xA145 =>   '#x2027',       # HYPHENATION POINT
    0xA146 =>   '#xFF1B',       # FULLWIDTH SEMICOLON
    0xA147 =>   '#xFF1A',       # FULLWIDTH COLON
    0xA148 =>   '#xFF1F',       # FULLWIDTH QUESTION MARK
[...13883 lines suppressed...]
    0xF9EC =>   '#x2558',       # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
    0xF9ED =>   '#x2567',       # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
    0xF9EE =>   '#x255B',       # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
    0xF9EF =>   '#x2553',       # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
    0xF9F0 =>   '#x2565',       # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
    0xF9F1 =>   '#x2556',       # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
    0xF9F2 =>   '#x255F',       # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
    0xF9F3 =>   '#x256B',       # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
    0xF9F4 =>   '#x2562',       # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
    0xF9F5 =>   '#x2559',       # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
    0xF9F6 =>   '#x2568',       # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
    0xF9F7 =>   '#x255C',       # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
    0xF9F8 =>   '#x2551',       # BOX DRAWINGS DOUBLE VERTICAL
    0xF9F9 =>   '#x2550',       # BOX DRAWINGS DOUBLE HORIZONTAL
    0xF9FA =>   '#x256D',       # BOX DRAWINGS LIGHT ARC DOWN AND RIGHT
    0xF9FB =>   '#x256E',       # BOX DRAWINGS LIGHT ARC DOWN AND LEFT
    0xF9FC =>   '#x2570',       # BOX DRAWINGS LIGHT ARC UP AND RIGHT
    0xF9FD =>   '#x256F',       # BOX DRAWINGS LIGHT ARC UP AND LEFT
    0xF9FE =>   '#x2593',       # DARK SHADE
};

--- NEW FILE ---

package MHonArc::CharEnt::GB2312;

# Chinese GB2312
#       Derived from <ftp://xcin.linux.org.tw/pub/xcin/i18n/charset/GB2312.gz>
+{
  #--------------------------------------------------------------------------
  # Hex Code    Entity Ref      # ISO external entity and description
  #--------------------------------------------------------------------------
    0xA1A1 =>   '#x3000',       # IDEOGRAPHIC SPACE
    0xA1A2 =>   '#x3001',       # IDEOGRAPHIC COMMA
    0xA1A3 =>   '#x3002',       # IDEOGRAPHIC FULL STOP
    0xA1A4 =>   '#x30FB',       # KATAKANA MIDDLE DOT
    0xA1A5 =>   '#x02C9',       # MODIFIER LETTER MACRON (Mandarin Chinese first tone)
    0xA1A6 =>   '#x02C7',       # CARON (Mandarin Chinese third tone)
    0xA1A7 =>   '#x00A8',       # DIAERESIS
    0xA1A8 =>   '#x3003',       # DITTO MARK
    0xA1A9 =>   '#x3005',       # IDEOGRAPHIC ITERATION MARK
    0xA1AA =>   '#x2015',       # HORIZONTAL BAR
[...7416 lines suppressed...]
    0xF7EC =>   '#x9EDB',       # CJK UNIFIED IDEOGRAPH
    0xF7ED =>   '#x9EDC',       # CJK UNIFIED IDEOGRAPH
    0xF7EE =>   '#x9EDD',       # CJK UNIFIED IDEOGRAPH
    0xF7EF =>   '#x9EE0',       # CJK UNIFIED IDEOGRAPH
    0xF7F0 =>   '#x9EDF',       # CJK UNIFIED IDEOGRAPH
    0xF7F1 =>   '#x9EE2',       # CJK UNIFIED IDEOGRAPH
    0xF7F2 =>   '#x9EE9',       # CJK UNIFIED IDEOGRAPH
    0xF7F3 =>   '#x9EE7',       # CJK UNIFIED IDEOGRAPH
    0xF7F4 =>   '#x9EE5',       # CJK UNIFIED IDEOGRAPH
    0xF7F5 =>   '#x9EEA',       # CJK UNIFIED IDEOGRAPH
    0xF7F6 =>   '#x9EEF',       # CJK UNIFIED IDEOGRAPH
    0xF7F7 =>   '#x9F22',       # CJK UNIFIED IDEOGRAPH
    0xF7F8 =>   '#x9F2C',       # CJK UNIFIED IDEOGRAPH
    0xF7F9 =>   '#x9F2F',       # CJK UNIFIED IDEOGRAPH
    0xF7FA =>   '#x9F39',       # CJK UNIFIED IDEOGRAPH
    0xF7FB =>   '#x9F37',       # CJK UNIFIED IDEOGRAPH
    0xF7FC =>   '#x9F3D',       # CJK UNIFIED IDEOGRAPH
    0xF7FD =>   '#x9F3E',       # CJK UNIFIED IDEOGRAPH
    0xF7FE =>   '#x9F44',       # CJK UNIFIED IDEOGRAPH
};

---------------------------------------------------------------------
To sign-off this list, send email to [EMAIL PROTECTED] with the
message text UNSUBSCRIBE MHONARC-DEV

Reply via email to