mbstring encodings.xml reference.xml

Moriyoshi Koizumi Sat, 13 Mar 2004 15:33:11 -0800

moriyoshi               Sat Mar 13 18:33:08 2004 EDT

  Added files:                 
    /phpdoc/en/reference/mbstring       encodings.xml


  Modified files:              
    /phpdoc/en/reference/mbstring       reference.xml 
  Log:
  - Add "summaries of supported encodings" section. Be sure to rerun "configure".
    TODO: maybe I'm not using <segmentedlist> correctly. docbook experts there?
  - Updated the location of cjk.inf
  - Correct some silly typos.

http://cvs.php.net/diff.php/phpdoc/en/reference/mbstring/reference.xml?r1=1.18&r2=1.19&ty=u
Index: phpdoc/en/reference/mbstring/reference.xml
diff -u phpdoc/en/reference/mbstring/reference.xml:1.18 
phpdoc/en/reference/mbstring/reference.xml:1.19
--- phpdoc/en/reference/mbstring/reference.xml:1.18     Sat Mar 13 08:34:51 2004
+++ phpdoc/en/reference/mbstring/reference.xml  Sat Mar 13 18:33:07 2004
@@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="iso-8859-1"?>
-<!-- $Revision: 1.18 $ -->
+<!-- $Revision: 1.19 $ -->
  <reference id="ref.mbstring">
-  <title>Multi-Byte String Functions</title> 
-  <titleabbrev>Multi-Byte String</titleabbrev>
+  <title>Multibyte String Functions</title> 
+  <titleabbrev>Multibyte String</titleabbrev>
   <partintro>
 
    <section id="mbstring.intro">
@@ -110,7 +110,6 @@
        scanner and the character encoding.
       </para>
      </note>
-
      <note>
       <para>
        If you have some database connected with PHP, it is recommended that
@@ -148,13 +147,13 @@
      </para>
      <note>
       <para>
-       In PHP 4.3.2 or earlier versions, <literal>mbstring</literal>
-       there is a limitation in this functionality that
-       <literal>mbstring</literal> does not perform character encoding
-       conversion in POST data if the <literal>enctype</literal> attribute in
-       the <literal>form</literal> element is set to
-       <literal>multipart/form-data</literal>. So you have to convert
-       the incoming data by yourself in this case if necessary.
+       In PHP 4.3.2 or earlier versions, there was a limitation in this
+          functionality that <literal>mbstring</literal> does not perform
+          character encoding conversion in POST data if the
+          <literal>enctype</literal> attribute in the <literal>form</literal>
+          element is set to <literal>multipart/form-data</literal>.
+          So you have to convert the incoming data by yourself in this case
+          if necessary.
       </para>
       <para>
        Beginning with PHP 4.3.3, if <literal>enctype</literal> for HTML form is
@@ -257,300 +256,306 @@
      </para>
    </section>
 
-   <section id="mbstring.encodings">
-     <title>Supported Character Encodings</title>
-     <simpara>
-      Currently the following character encodings are supported by the
-      <literal>mbstring</literal> module. Any of those Character encodings
-      can be specified in the <literal>encoding</literal> parameter of
-      <literal>mbstring</literal> functions.
-     </simpara>
-     <para>
-      The following character encoding is supported in this PHP
-      extension: 
-     </para>
-     <itemizedlist>
-      <listitem><simpara>UCS-4</simpara></listitem>
-      <listitem><simpara>UCS-4BE</simpara></listitem>
-      <listitem><simpara>UCS-4LE</simpara></listitem>
-      <listitem><simpara>UCS-2</simpara></listitem>
-      <listitem><simpara>UCS-2BE</simpara></listitem>
-      <listitem><simpara>UCS-2LE</simpara></listitem>
-      <listitem><simpara>UTF-32</simpara></listitem>
-      <listitem><simpara>UTF-32BE</simpara></listitem>
-      <listitem><simpara>UTF-32LE</simpara></listitem>
-      <listitem><simpara>UTF-16</simpara></listitem>
-      <listitem><simpara>UTF-16BE</simpara></listitem>
-      <listitem><simpara>UTF-16LE</simpara></listitem>
-      <listitem><simpara>UTF-7</simpara></listitem>
-      <listitem><simpara>UTF7-IMAP</simpara></listitem>
-      <listitem><simpara>UTF-8</simpara></listitem>
-      <listitem><simpara>ASCII</simpara></listitem>
-      <listitem><simpara>EUC-JP</simpara></listitem>
-      <listitem><simpara>SJIS</simpara></listitem>
-      <listitem><simpara>eucJP-win</simpara></listitem>
-      <listitem><simpara>SJIS-win</simpara></listitem>
-      <listitem><simpara>ISO-2022-JP</simpara></listitem>
-      <listitem><simpara>JIS</simpara></listitem>
-      <listitem><simpara>ISO-8859-1</simpara></listitem>
-      <listitem><simpara>ISO-8859-2</simpara></listitem>
-      <listitem><simpara>ISO-8859-3</simpara></listitem>
-      <listitem><simpara>ISO-8859-4</simpara></listitem>
-      <listitem><simpara>ISO-8859-5</simpara></listitem>
-      <listitem><simpara>ISO-8859-6</simpara></listitem>
-      <listitem><simpara>ISO-8859-7</simpara></listitem>
-      <listitem><simpara>ISO-8859-8</simpara></listitem>
-      <listitem><simpara>ISO-8859-9</simpara></listitem>
-      <listitem><simpara>ISO-8859-10</simpara></listitem>
-      <listitem><simpara>ISO-8859-13</simpara></listitem>
-      <listitem><simpara>ISO-8859-14</simpara></listitem>
-      <listitem><simpara>ISO-8859-15</simpara></listitem>
-      <listitem><simpara>byte2be</simpara></listitem>
-      <listitem><simpara>byte2le</simpara></listitem>
-      <listitem><simpara>byte4be</simpara></listitem>
-      <listitem><simpara>byte4le</simpara></listitem>
-      <listitem><simpara>BASE64</simpara></listitem>
-      <listitem><simpara>HTML-ENTITIES</simpara></listitem>
-      <listitem><simpara>7bit</simpara></listitem>
-      <listitem><simpara>8bit</simpara></listitem>
-      <listitem><simpara>EUC-CN</simpara></listitem>
-      <listitem><simpara>CP936</simpara></listitem>
-      <listitem><simpara>HZ</simpara></listitem>
-      <listitem><simpara>EUC-TW</simpara></listitem>
-      <listitem><simpara>CP950</simpara></listitem>
-      <listitem><simpara>BIG-5</simpara></listitem>
-      <listitem><simpara>EUC-KR</simpara></listitem>
-      <listitem><simpara>UHC (CP949)</simpara></listitem>
-      <listitem><simpara>ISO-2022-KR</simpara></listitem>
-      <listitem><simpara>Windows-1251 (CP1251)</simpara></listitem>
-      <listitem><simpara>Windows-1252 (CP1252)</simpara></listitem>
-      <listitem><simpara>CP866 (IBM866)</simpara></listitem>
-      <listitem><simpara>KOI8-R</simpara></listitem>
-     </itemizedlist>
-     <para>
-      &php.ini; entry, which accepts encoding name,
-      accepts &quot;<literal>auto</literal>&quot; and
-      &quot;<literal>pass</literal>&quot; also.
-      <literal>mbstring</literal> functions, which accepts encoding
-      name, and accepts &quot;<literal>auto</literal>&quot;.
-     </para>
-     <para>
-      If &quot;<literal>pass</literal>&quot; is set, no character
-      encoding conversion is performed.
-     </para>
-     <para>
-      If &quot;<literal>auto</literal>&quot; is set, it is expanded to
-      the list of encodings defined per the <link 
linkend="mbstring.configuration">NLS</link>.
-      For instance, if the NLS is set to <literal>Japanese</literal>,
-      the value is assumed to be
-      &quot;<literal>ASCII,JIS,UTF-8,EUC-JP,SJIS</literal>&quot;.
-     </para>
-     <para>
-      See also <function>mb_detect_order</function>
-     </para>
+   <section id="mbstring.supported-encodings">
+    <title>Supported Character Encodings</title>
+    <simpara>
+     Currently the following character encodings are supported by the
+     <literal>mbstring</literal> module. Any of those Character encodings
+     can be specified in the <literal>encoding</literal> parameter of
+     <literal>mbstring</literal> functions.
+    </simpara>
+    <para>
+     The following character encoding is supported in this PHP
+     extension: 
+    </para>
+    <itemizedlist>
+     <listitem><simpara>UCS-4</simpara></listitem>
+     <listitem><simpara>UCS-4BE</simpara></listitem>
+     <listitem><simpara>UCS-4LE</simpara></listitem>
+     <listitem><simpara>UCS-2</simpara></listitem>
+     <listitem><simpara>UCS-2BE</simpara></listitem>
+     <listitem><simpara>UCS-2LE</simpara></listitem>
+     <listitem><simpara>UTF-32</simpara></listitem>
+     <listitem><simpara>UTF-32BE</simpara></listitem>
+     <listitem><simpara>UTF-32LE</simpara></listitem>
+     <listitem><simpara>UTF-16</simpara></listitem>
+     <listitem><simpara>UTF-16BE</simpara></listitem>
+     <listitem><simpara>UTF-16LE</simpara></listitem>
+     <listitem><simpara>UTF-7</simpara></listitem>
+     <listitem><simpara>UTF7-IMAP</simpara></listitem>
+     <listitem><simpara>UTF-8</simpara></listitem>
+     <listitem><simpara>ASCII</simpara></listitem>
+     <listitem><simpara>EUC-JP</simpara></listitem>
+     <listitem><simpara>SJIS</simpara></listitem>
+     <listitem><simpara>eucJP-win</simpara></listitem>
+     <listitem><simpara>SJIS-win</simpara></listitem>
+     <listitem><simpara>ISO-2022-JP</simpara></listitem>
+     <listitem><simpara>JIS</simpara></listitem>
+     <listitem><simpara>ISO-8859-1</simpara></listitem>
+     <listitem><simpara>ISO-8859-2</simpara></listitem>
+     <listitem><simpara>ISO-8859-3</simpara></listitem>
+     <listitem><simpara>ISO-8859-4</simpara></listitem>
+     <listitem><simpara>ISO-8859-5</simpara></listitem>
+     <listitem><simpara>ISO-8859-6</simpara></listitem>
+     <listitem><simpara>ISO-8859-7</simpara></listitem>
+     <listitem><simpara>ISO-8859-8</simpara></listitem>
+     <listitem><simpara>ISO-8859-9</simpara></listitem>
+     <listitem><simpara>ISO-8859-10</simpara></listitem>
+     <listitem><simpara>ISO-8859-13</simpara></listitem>
+     <listitem><simpara>ISO-8859-14</simpara></listitem>
+     <listitem><simpara>ISO-8859-15</simpara></listitem>
+     <listitem><simpara>byte2be</simpara></listitem>
+     <listitem><simpara>byte2le</simpara></listitem>
+     <listitem><simpara>byte4be</simpara></listitem>
+     <listitem><simpara>byte4le</simpara></listitem>
+     <listitem><simpara>BASE64</simpara></listitem>
+     <listitem><simpara>HTML-ENTITIES</simpara></listitem>
+     <listitem><simpara>7bit</simpara></listitem>
+     <listitem><simpara>8bit</simpara></listitem>
+     <listitem><simpara>EUC-CN</simpara></listitem>
+     <listitem><simpara>CP936</simpara></listitem>
+     <listitem><simpara>HZ</simpara></listitem>
+     <listitem><simpara>EUC-TW</simpara></listitem>
+     <listitem><simpara>CP950</simpara></listitem>
+     <listitem><simpara>BIG-5</simpara></listitem>
+     <listitem><simpara>EUC-KR</simpara></listitem>
+     <listitem><simpara>UHC (CP949)</simpara></listitem>
+     <listitem><simpara>ISO-2022-KR</simpara></listitem>
+     <listitem><simpara>Windows-1251 (CP1251)</simpara></listitem>
+     <listitem><simpara>Windows-1252 (CP1252)</simpara></listitem>
+     <listitem><simpara>CP866 (IBM866)</simpara></listitem>
+     <listitem><simpara>KOI8-R</simpara></listitem>
+    </itemizedlist>
+    <para>
+     &php.ini; entry, which accepts encoding name,
+     accepts &quot;<literal>auto</literal>&quot; and
+     &quot;<literal>pass</literal>&quot; also.
+     <literal>mbstring</literal> functions, which accepts encoding
+     name, and accepts &quot;<literal>auto</literal>&quot;.
+    </para>
+    <para>
+     If &quot;<literal>pass</literal>&quot; is set, no character
+     encoding conversion is performed.
+    </para>
+    <para>
+     If &quot;<literal>auto</literal>&quot; is set, it is expanded to
+     the list of encodings defined per the <link 
linkend="mbstring.configuration">NLS</link>.
+     For instance, if the NLS is set to <literal>Japanese</literal>,
+     the value is assumed to be
+     &quot;<literal>ASCII,JIS,UTF-8,EUC-JP,SJIS</literal>&quot;.
+    </para>
+    <para>
+     See also <function>mb_detect_order</function>
+    </para>
    </section>
     
    <section id="mbstring.overload">
-     <title>
-      Function Overloading Feature
-     </title>
-     <para>
-      You might often find it difficult to get an existing PHP application
-      work in a given multibyte environment. That's mostly because lots of
-      PHP applications out there are written with the standard
-      string functions such as <function>substr</function>, which are
-      known to not properly handle multibyte-encoded strings.
-     </para>
-     <para>
-      mbstring supports 'function overloading' feature which enables
-      you to add multibyte awareness to such an application without
-      code modification by overloading multibyte counterparts on
-      the standard string functions. For example,
-      <function>mb_substr</function> is called instead of
-      <function>substr</function> if function overloading is enabled.
-      This feature makes it easy to port applications that only support
-      single-byte encodings to a multibyte environment in many cases.
-     </para>
-     <para>
-      To use the function overloading, set
-      <literal>mbstring.func_overload</literal> in &php.ini; to a
-      positive value that represents a combination of bitmasks specifying
-      the categories of functions to be overloaded. It should be set
-      to 1 to overload the <function>mail</function> function. 2 for string
-      functions, 4 for regular expression functions. For example,
-      if is set for 7, mail, strings and regular expression functions should
-      be overloaded. The list of overloaded functions are shown below.
-      <table>
-       <title>Functions to be overloaded</title>
-       <tgroup cols="3">
-        <thead>
-         <row>
-          <entry>value of mbstring.func_overload</entry>
-          <entry>original function</entry>
-          <entry>overloaded function</entry>
-         </row>
-       </thead>
-       <tbody>
-         <row>
-          <entry>1</entry>
-          <entry><function>mail</function></entry>
-          <entry><function>mb_send_mail</function></entry>
-         </row>
-         <row>
-          <entry>2</entry>
-          <entry><function>strlen</function></entry>
-          <entry><function>mb_strlen</function></entry>
-         </row>
-         <row>
-          <entry>2</entry>
-          <entry><function>strpos</function></entry>
-          <entry><function>mb_strpos</function></entry>
-         </row>
-         <row>
-          <entry>2</entry>
-          <entry><function>strrpos</function></entry>
-          <entry><function>mb_strrpos</function></entry>
-         </row>
-         <row>
-          <entry>2</entry>
-          <entry><function>substr</function></entry>
-          <entry><function>mb_substr</function></entry>
-         </row>
-         <row>
-          <entry>2</entry>
-          <entry><function>strtolower</function></entry>
-          <entry><function>mb_strtolower</function></entry>
-         </row>
-         <row>
-          <entry>2</entry>
-          <entry><function>strtoupper</function></entry>
-          <entry><function>mb_strtoupper</function></entry>
-         </row>
-         <row>
-          <entry>2</entry>
-          <entry><function>substr_count</function></entry>
-          <entry><function>mb_substr_count</function></entry>
-         </row>
-         <row>
-          <entry>4</entry>
-          <entry><function>ereg</function></entry>
-          <entry><function>mb_ereg</function></entry>
-         </row>
-         <row>
-          <entry>4</entry>
-          <entry><function>eregi</function></entry>
-          <entry><function>mb_eregi</function></entry>
-         </row>
-         <row>
-          <entry>4</entry>
-          <entry><function>ereg_replace</function></entry>
-          <entry><function>mb_ereg_replace</function></entry>
-         </row>
-         <row>
-          <entry>4</entry>
-          <entry><function>eregi_replace</function></entry>
-          <entry><function>mb_eregi_replace</function></entry>
-         </row>
-         <row>
-          <entry>4</entry>
-          <entry><function>split</function></entry>
-          <entry><function>mb_split</function></entry>
-         </row>
-        </tbody>
-       </tgroup>
-      </table>
-     </para>
+    <title>
+     Function Overloading Feature
+    </title>
+    <para>
+     You might often find it difficult to get an existing PHP application
+     work in a given multibyte environment. That's mostly because lots of
+     PHP applications out there are written with the standard
+     string functions such as <function>substr</function>, which are
+     known to not properly handle multibyte-encoded strings.
+    </para>
+    <para>
+     mbstring supports 'function overloading' feature which enables
+     you to add multibyte awareness to such an application without
+     code modification by overloading multibyte counterparts on
+     the standard string functions. For example,
+     <function>mb_substr</function> is called instead of
+     <function>substr</function> if function overloading is enabled.
+     This feature makes it easy to port applications that only support
+     single-byte encodings to a multibyte environment in many cases.
+    </para>
+    <para>
+     To use the function overloading, set
+     <literal>mbstring.func_overload</literal> in &php.ini; to a
+     positive value that represents a combination of bitmasks specifying
+     the categories of functions to be overloaded. It should be set
+     to 1 to overload the <function>mail</function> function. 2 for string
+     functions, 4 for regular expression functions. For example,
+     if is set for 7, mail, strings and regular expression functions should
+     be overloaded. The list of overloaded functions are shown below.
+     <table>
+      <title>Functions to be overloaded</title>
+      <tgroup cols="3">
+       <thead>
+        <row>
+         <entry>value of mbstring.func_overload</entry>
+         <entry>original function</entry>
+         <entry>overloaded function</entry>
+        </row>
+      </thead>
+      <tbody>
+        <row>
+         <entry>1</entry>
+         <entry><function>mail</function></entry>
+         <entry><function>mb_send_mail</function></entry>
+        </row>
+        <row>
+         <entry>2</entry>
+         <entry><function>strlen</function></entry>
+         <entry><function>mb_strlen</function></entry>
+        </row>
+        <row>
+         <entry>2</entry>
+         <entry><function>strpos</function></entry>
+         <entry><function>mb_strpos</function></entry>
+        </row>
+        <row>
+         <entry>2</entry>
+         <entry><function>strrpos</function></entry>
+         <entry><function>mb_strrpos</function></entry>
+        </row>
+        <row>
+         <entry>2</entry>
+         <entry><function>substr</function></entry>
+         <entry><function>mb_substr</function></entry>
+        </row>
+        <row>
+         <entry>2</entry>
+         <entry><function>strtolower</function></entry>
+         <entry><function>mb_strtolower</function></entry>
+        </row>
+        <row>
+         <entry>2</entry>
+         <entry><function>strtoupper</function></entry>
+         <entry><function>mb_strtoupper</function></entry>
+        </row>
+        <row>
+         <entry>2</entry>
+         <entry><function>substr_count</function></entry>
+         <entry><function>mb_substr_count</function></entry>
+        </row>
+        <row>
+         <entry>4</entry>
+         <entry><function>ereg</function></entry>
+         <entry><function>mb_ereg</function></entry>
+        </row>
+        <row>
+         <entry>4</entry>
+         <entry><function>eregi</function></entry>
+         <entry><function>mb_eregi</function></entry>
+        </row>
+        <row>
+         <entry>4</entry>
+         <entry><function>ereg_replace</function></entry>
+         <entry><function>mb_ereg_replace</function></entry>
+        </row>
+        <row>
+         <entry>4</entry>
+         <entry><function>eregi_replace</function></entry>
+         <entry><function>mb_eregi_replace</function></entry>
+        </row>
+        <row>
+         <entry>4</entry>
+         <entry><function>split</function></entry>
+         <entry><function>mb_split</function></entry>
+        </row>
+       </tbody>
+      </tgroup>
+     </table>
+    </para>
+       <note>
+        <para>
+         It is not recommended to use the function overloading option in
+         the per-directory context, because it's not confirmed yet to be
+         stable enough in a production environment and may lead to undefined
+         behaviour.
+        </para>
+       </note>
    </section>
 
    <section id="mbstring.ja-basic">
-     <title>Basics of Japanese multi-byte encodings</title>
-     <para>
-      It is often said quite hard to figure out how Japanese texts are
-      handled in the computer. This is not only because Japanese characters
-      can only be represented by multibyte encodings, but because different
-      encoding standards are adopted for different purposes / platforms.
-      Moreover, not a few character set standards are used there, which
-      are slightly different from one another. Those facts have often led
-      developers to inevitable mess-up.
-     </para>
-     <para> 
-      To create a working web application that would be put in the Japanese
-      environment, it is important to use the proper character encoding and
-      character set for the task in hand.
-     </para>
-     <para>
-      <itemizedlist>
-       <listitem>
-        <simpara>Storage for a character can be up to six bytes</simpara>
-       </listitem>
-       <listitem>
-        <simpara>
-         Most of multibyte characters often appear twice as wide as 
-         a single-byte character on display. Those characters are called
-         "zen-kaku" in Japanese which means "full width", and the other
-         (narrower) characters are called "han-kaku" - means half width.
-         However the graphical properties of the characters depend on
-         the glyphs of the type faces used to display them or print them out.
-        </simpara>
-       </listitem>
-       <listitem>
-        <simpara>
-         Some character encodings use shift(escape) sequences defined
-         in ISO2022 to switch the code map of the specific code area
-         (<literal>00h</literal> to <literal>7fh</literal>).
-        </simpara>
-       </listitem>
-       <listitem>
-        <simpara>
-         ISO-2022-JP should be used in SMTP/NNTP, and headers and entities
-         should be reencoded as per RFC requirements. Although those are not
-         requisites, it's still a good idea because several popular user
-         agents cannot recognize any other encoding methods.
-        </simpara>
-       </listitem>
-       <listitem>
-        <simpara>
-         Webpages created for mobile phone services such as
-         <ulink url="http://www.eurotechnology.com/imode/faq.html";>i-mode</ulink>,
-         <ulink url="http://www.vodafone.jp/english/live/";>Vodafone live!</ulink>, or 
<ulink url="http://www.au.kddi.com/english/ezweb/";>ezweb</ulink>
-         are supposed to use Shift_JIS.
-        </simpara>
-       </listitem>
-      </itemizedlist>
-     </para>
+    <title>Basics of Japanese multi-byte encodings</title>
+    <para>
+     It is often said quite hard to figure out how Japanese texts are
+     handled in the computer. This is not only because Japanese characters
+     can only be represented by multibyte encodings, but because different
+     encoding standards are adopted for different purposes / platforms.
+     Moreover, not a few character set standards are used there, which
+     are slightly different from one another. Those facts have often led
+     developers to inevitable mess-up.
+    </para>
+    <para> 
+     To create a working web application that would be put in the Japanese
+     environment, it is important to use the proper character encoding and
+     character set for the task in hand.
+    </para>
+    <para>
+     <itemizedlist>
+      <listitem>
+       <simpara>Storage for a character can be up to six bytes</simpara>
+      </listitem>
+      <listitem>
+       <simpara>
+        Most of multibyte characters often appear twice as wide as 
+        a single-byte character on display. Those characters are called
+        "zen-kaku" in Japanese which means "full width", and the other
+        (narrower) characters are called "han-kaku" - means half width.
+        However the graphical properties of the characters depend on
+        the glyphs of the type faces used to display them or print them out.
+       </simpara>
+      </listitem>
+      <listitem>
+       <simpara>
+        Some character encodings use shift(escape) sequences defined
+        in ISO2022 to switch the code map of the specific code area
+        (<literal>00h</literal> to <literal>7fh</literal>).
+       </simpara>
+      </listitem>
+      <listitem>
+       <simpara>
+        ISO-2022-JP should be used in SMTP/NNTP, and headers and entities
+        should be reencoded as per RFC requirements. Although those are not
+        requisites, it's still a good idea because several popular user
+        agents cannot recognize any other encoding methods.
+       </simpara>
+      </listitem>
+      <listitem>
+       <simpara>
+        Webpages created for mobile phone services such as
+        <ulink url="http://www.nttdocomo.com/corebiz/imode/";>i-mode</ulink>,
+        <ulink url="http://www.vodafone.jp/english/live/";>Vodafone live!</ulink>, or 
<ulink url="http://www.au.kddi.com/english/ezweb/";>EZweb</ulink>
+        are supposed to use Shift_JIS.
+       </simpara>
+      </listitem>
+     </itemizedlist>
+    </para>
    </section>
 
    <section id="mbstring.ref">
-     <title>References</title>
-     <para>
-      Multibyte character encoding schemes and the related issues are very
-      complicated. There should be too few space to cover in sufficient details.
-      Please refer to the following URLs and other resources for
-      further readings.
-      <itemizedlist>
-       <listitem>
-        <para>
-         Unicode materials
-        </para>
-        <para>
-         <ulink url="&url.unicode;">&url.unicode;</ulink>
-        </para>
-       </listitem>
-       <listitem>
-        <para>
-         Japanese/Korean/Chinese character information
-        </para>
-        <para>
-         <ulink url="ftp://ftp.ora.com/pub/examples/nutshell/ujip/doc/cjk.inf";>
-          <literal>
-           ftp://ftp.ora.com/pub/examples/nutshell/ujip/doc/cjk.inf
-          </literal>
-         </ulink>
-        </para>
-       </listitem>
-      </itemizedlist>
-     </para>
+    <title>References</title>
+    <para>
+     Multibyte character encoding schemes and the related issues are very
+     complicated. There should be too few space to cover in sufficient details.
+     Please refer to the following URLs and other resources for
+     further readings.
+     <itemizedlist>
+      <listitem>
+       <para>
+        Unicode materials
+       </para>
+       <para>
+        <ulink url="&url.unicode;">&url.unicode;</ulink>
+       </para>
+      </listitem>
+      <listitem>
+       <para>
+        Japanese/Korean/Chinese character information
+       </para>
+       <para>
+        <ulink 
url="http://examples.oreilly.com/cjkvinfo/doc/cjk.inf";>http://examples.oreilly.com/cjkvinfo/doc/cjk.inf</ulink>
+       </para>
+      </listitem>
+     </itemizedlist>
+    </para>
    </section>
+&reference.mbstring.encodings;
+
   </partintro>
 
 &reference.mbstring.functions;

http://cvs.php.net/co.php/phpdoc/en/reference/mbstring/encodings.xml?r=1.1&p=1
Index: phpdoc/en/reference/mbstring/encodings.xml
+++ phpdoc/en/reference/mbstring/encodings.xml
<?xml version="1.0" encoding="iso-8859-1"?>
<!-- $Revision: 1.1 $ -->
 <section id="mbstring.encodings">
  <title>Summaries of supported encodings</title>
  <segmentedlist>
   <title>UCS-4</title>
   <segtitle>Name in the IANA character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>ISO-10646-UCS-4</seg>
    <seg>ISO 10646</seg>
    <seg>
     The Universal Character Set with 31-bit code space, standardized as UCS-4
     by ISO/IEC 10646. It is kept synchronized with the latest version of the
     Unicode code map.
    </seg>
    <seg>
     If this name is used in the encoding conversion facility, 
     the converter attempts to identify by the preceding BOM
     (byte order mark)in which endian the subsequent bytes
     are represented.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UCS-4BE</title>
   <segtitle>Name in the IANA character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>ISO-10646-UCS-4</seg>
    <seg>UCS-4</seg>
    <seg>
     See above.
    </seg>
    <seg>
     In contrast to <literal>UCS-4</literal>, strings are always assumed
     to be in big endian form.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UCS-4LE</title>
   <segtitle>Name in the IANA character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>ISO-10646-UCS-4</seg>
    <seg>UCS-4</seg>
    <seg>
     See above.
    </seg>
    <seg>
     In contrast to <literal>UCS-4</literal>, strings are always assumed
     to be in little endian form.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UCS-2</title>
   <segtitle>Name in the IANA character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>ISO-10646-UCS-2</seg>
    <seg>UCS-2</seg>
    <seg>
     The Universal Character Set with 16-bit code space, standardized as UCS-2
     by ISO/IEC 10646. It is kept synchronized with the latest version of the
     unicode code map.
    </seg>
    <seg>
     If this name is used in the encoding conversion facility, 
     the converter attempts to identify by the preceding BOM
     (byte order mark)in which endian the subsequent bytes
     are represented.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UCS-2BE</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>ISO-10646-UCS-2</seg>
    <seg>UCS-2</seg>
    <seg>
     See above.
    </seg>
    <seg>
     In contrast to <literal>UCS-2</literal>, strings are always assumed
     to be in big endian form.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UCS-2LE</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Ddditional note</segtitle>
   <seglistitem>
    <seg>ISO-10646-UCS-2</seg>
    <seg>UCS-2</seg>
    <seg>
     See above.
    </seg>
    <seg>
     In contrast to <literal>UCS-2</literal>, strings are always assumed
     to be in little endian form.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UTF-32</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>UTF-32</seg>
    <seg>Unicode</seg>
    <seg>
     Unicode Transformation Format of 32-bit unit width, whose encoding space
     refers to the Unicode's codeset standard. This encoding scheme wasn't
     identical to UCS-4 because the code space of Unicode were limited to
     a 21-bit value.
    </seg>
    <seg>
     If this name is used in the encoding conversion facility, 
     the converter attempts to identify by the preceding BOM
     (byte order mark)in which endian the subsequent bytes
     are represented.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UTF-32BE</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>UTF-32BE</seg>
    <seg>Unicode</seg>
    <seg>See above</seg>
    <seg>
     In contrast to <literal>UTF-32</literal>, strings are always assumed
     to be in big endian form.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UTF-32LE</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>UTF-32LE</seg>
    <seg>Unicode</seg>
    <seg>See above</seg>
    <seg>
     In contrast to <literal>UTF-32</literal>, strings are always assumed
     to be in little endian form.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UTF-16</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>UTF-16</seg>
    <seg>Unicode</seg>
    <seg>
     Unicode Transformation Format of 16-bit unit width. It's worth a note
     that UTF-16 is no longer the same specification as UCS-2 because the
     surrogate mechanism has been introduced since Unicode 2.0 and
     UTF-16 now refers to a 21-bit code space.
    </seg>
    <seg>
     If this name is used in the encoding conversion facility, 
     the converter attempts to identify by the preceding BOM
     (byte order mark)in which endian the subsequent bytes
     are represented.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UTF-16BE</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>UTF-16BE</seg>
    <seg>Unicode</seg>
    <seg>
     See above.
    </seg>
    <seg>
     In contrast to <literal>UTF-16</literal>, strings are always assumed
     to be in big endian form.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UTF-16LE</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>UTF-16BE</seg>
    <seg>Unicode</seg>
    <seg>
     See above.
    </seg>
    <seg>
     In contrast to <literal>UTF-16</literal>, strings are always assumed
     to be in big endian form.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UTF-8</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>UTF-8</seg>
    <seg>Unicode / UCS</seg>
    <seg>
     Unicode Transformation Format of 8-bit unit width.
    </seg>
    <seg>none</seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UTF-7</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>UTF-7</seg>
    <seg>Unicode</seg>
    <seg>
     A mail-safe transformation format of Unicode, specified in
     <ulink url="&url.rfc;2152">RFC2152</ulink>.
    </seg>
    <seg>none</seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UTF7-IMAP</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>(none)</seg>
    <seg>Unicode</seg>
    <seg>
     A variant of UTF-7 which is specialized for use in the
     <ulink url="&url.rfc;3501">IMAP protocol</ulink>.
    </seg>
    <seg>none</seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ASCII</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>
     US-ASCII (preferred MIME name) / iso-ir-6 / ANSI_X3.4-1986 /
     ISO_646.irv:1991 / ASCII / ISO646-US / us / IBM367 / CP367 / csASCII
    </seg>
    <seg>ASCII / ISO 646</seg>
    <seg>
     American Standard Code for Information Interchange is a commonly-used
     7-bit encoding. Also standardized as an international standard, ISO 646.
    </seg>
    <seg>(none)</seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>EUC-JP</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>
     EUC-JP (preferred MIME name) /
     Extended_UNIX_Code_Packed_Format_for_Japanese / csEUCPkdFmtJapanese
    </seg>
    <seg>
     Compound of US-ASCII / JIS X0201:1997 (hankaku kana part) /
     JIS X0208:1990 / JIS X0212:1990
    </seg>
    <seg>
     As you see the name is derived from an abbreviation of Extended UNIX Code
     Packed Format for Japanese, this encoding is mostly used on UNIX or
     alike platforms. The original encoding scheme, Extended UNIX Code, is
     designed on the basis of ISO 2022.
    </seg>
    <seg>
     The character set referred to by EUC-JP is different to IBM932 / CP932,
     which are used by OS/2&reg; and Microsoft&reg; Windows&reg;.
     For information interchange with those platforms, use EUCJP-WIN instead.
    </seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>SJIS</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>Shift_JIS (preferred MIME name) / MS_Kanji / csShift_JIS</seg>
    <seg>Compound of JIS X0201:1997 / JIS X0208:1997</seg>
    <seg>
     Shift_JIS was developed in early 80's, at the time personal Japanese word
     processors were brought into the market, in order to maintain
     compatiblities with the legacy encoding scheme JIS X 0201:1976.
     According to the IANA definition the codeset of Shift_JIS is slightly
     different to IBM932 / CP932. However, the names "SJIS" / "Shift_JIS" are
     often wrongly used to refer to these codesets.
    </seg>
    <seg>For the CP932 codemap, use SJIS-WIN instead.</seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>EUCJP-WIN</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>(none)</seg>
    <seg>
     Compound of JIS X0201:1997 / JIS X0208:1997 / IBM extensions / NEC extensions
    </seg>
    <seg>
     While this &quot;encoding&quot; uses the same encoding scheme as EUC-JP,
     the underlying character set is different. That is, some code points map
     to different characters than EUC-JP.
    </seg>
    <seg>none</seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>SJIS-win</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>Windows-31J / csWindows31J</seg>
    <seg>
     Compound of JIS X0201:1997 / JIS X0208:1997 / IBM extensions / NEC extensions
    </seg>
    <seg>
     While this &quot;encoding&quot; uses the same encoding scheme as
     Shift_JIS, the underlying character set is different. That means some code
     points map to different characters than Shift_JIS.
    </seg>
    <seg>(none)</seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-2022-JP</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg>ISO-2022-JP (preferred MIME name) / csISO2022JP</seg>
    <seg>
     US-ASCII / JIS X0201:1976 / JIS X0208:1978 / JIS X0208:1983
    </seg>
    <seg><ulink url="&url.rfc;1468">RFC1468</ulink></seg>
    <seg>(none)</seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>JIS</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-1</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-2</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-3</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-4</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-5</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-6</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-7</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-8</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-9</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-10</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-13</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-14</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-8859-15</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>byte2be</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>byte2le</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>byte4be</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>byte4le</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>BASE64</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>HTML-ENTITIES</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>7bit</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>8bit</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>EUC-CN</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>CP936</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>HZ</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>EUC-TW</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>CP950</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>BIG-5</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>EUC-KR</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>UHC (CP949)</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>ISO-2022-KR</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>Windows-1251 (CP1251)</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>Windows-1252 (CP1252)</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>CP866 (IBM866)</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
  <segmentedlist>
   <title>KOI8-R</title>
   <segtitle>Name in the iana character set registry</segtitle>
   <segtitle>Underlying character set</segtitle>
   <segtitle>Description</segtitle>
   <segtitle>Additional note</segtitle>
   <seglistitem>
    <seg></seg>
    <seg></seg>
    <seg></seg>
    <seg></seg>
   </seglistitem>
  </segmentedlist>
 </section>

<!-- Keep this comment at the end of the file
Local variables:
mode: sgml
sgml-omittag:t
sgml-shorttag:t
sgml-minimize-attributes:nil
sgml-always-quote-attributes:t
sgml-indent-step:1
sgml-indent-data:t
indent-tabs-mode:nil
sgml-parent-document:nil
sgml-default-dtd-file:"../../../manual.ced"
sgml-exposed-tags:nil
sgml-local-catalogs:nil
sgml-local-ecat-files:nil
End:

vim600: syn=xml fen fdm=syntax fdl=2 si
vim: et tw=78 syn=sgml
vi: ts=1 sw=1
-->

[PHP-DOC] cvs: phpdoc /en/reference/mbstring encodings.xml reference.xml

Reply via email to