On 11.07.21 20:00, Gustaf Neumann wrote:

When i look at the IANA page, i see several entries there, which are not in the naviserver default table. So, one should check, what's feasible to be added.

Dear all,

i've worked through the IANA charset definitions [1], especially through the "Preferred MIME Name" and "Name" fields and bring the default charset definitions of NaviServer up-to-date. In addition i have added the charset names which are identical in the IANA definitions and Tcl the list returned by [ns_charset] to reduce complexity and potential confusions. It would be good if someone working with many charsets to have a second look at the list. The changes are on bitbucket.

all the best

-g

[1] https://www.iana.org/assignments/character-sets/character-sets.xml



index f9fd791b..836338a7 100644
--- a/nsd/encoding.c
+++ b/nsd/encoding.c
@@ -90,9 +90,30 @@ static const struct {
     const char  *charset;
     const char  *name;
 } builtinChar[] = {
+    { "gb18030",             "cp936" },
+    { "gb_2312-80",          "gb2312" },
+    { "ibm437",              "cp437" },
+    { "ibm775",              "cp775" },
+    { "ibm850",              "cp850" },
+    { "ibm852",              "cp852" },
+    { "ibm855",              "cp855" },
+    { "ibm857",              "cp857" },
+    { "ibm860",              "cp860" },
+    { "ibm861",              "cp861" },
+    { "ibm862",              "cp862" },
+    { "ibm863",              "cp863" },
+    { "ibm864",              "cp864" },
+    { "ibm865",              "cp865" },
+    { "ibm866",              "cp866" },
+    { "ibm869",              "cp869" },
     { "iso-2022-jp",         "iso2022-jp" },
     { "iso-2022-kr",         "iso2022-kr" },
     { "iso-8859-1",          "iso8859-1" },
+    { "iso-8859-10",         "iso8859-10" },
+    { "iso-8859-13",         "iso8859-13" },
+    { "iso-8859-14",         "iso8859-14" },
+    { "iso-8859-15",         "iso8859-15" },
+    { "iso-8859-16",         "iso8859-16" },
     { "iso-8859-2",          "iso8859-2" },
     { "iso-8859-3",          "iso8859-3" },
     { "iso-8859-4",          "iso8859-4" },
@@ -101,6 +122,8 @@ static const struct {
     { "iso-8859-7",          "iso8859-7" },
     { "iso-8859-8",          "iso8859-8" },
     { "iso-8859-9",          "iso8859-9" },
+    { "jis_x0201",           "jis0201" },
+    { "jis_x0212-1990",      "jis0212" },
     { "korean",              "ksc5601" },
     { "ksc_5601",            "ksc5601" },
     { "mac",                 "macRoman" },
@@ -140,6 +163,8 @@ static const struct {
     { "windows-1256",        "cp1256" },
     { "windows-1257",        "cp1257" },
     { "windows-1258",        "cp1258" },
+    { "windows-31j",         "cp932" },
+    { "windows-874",         "cp874" },
     { "x-mac",               "macRoman" },
     { "x-mac-centeuro",      "macCentEuro" },
     { "x-mac-centraleupore", "macCentEuro" },
@@ -154,6 +179,24 @@ static const struct {
     { "x-mac-turkish",       "macTurkish" },
     { "x-mac-ukraine",       "macUkraine" },
     { "x-macintosh",         "macRoman" },
+
+    /*
+     * The following entries are strictly speaking not needed, since the
+     * IANA name is identical with the Tcl charset name. We add these to
+     * be able to return full set of supported IANA charsets via
+     * [ns_charset].
+     *
+     * See:https://www.iana.org/assignments/character-sets/character-sets.xml
+     */
+    { "big5",                "big5" },
+    { "euc-jp",              "euc-jp" },
+    { "euc-kr",              "euc-kr" },
+    { "gb2312",              "gb2312" },
+    { "koi8-r",              "koi8-r" },
+    { "koi8-u",              "koi8-u" },
+    { "tis-620",             "tis-620" },
+    { "utf-8",               "utf-8" },
+
     { NULL, NULL }
 };
_______________________________________________
naviserver-devel mailing list
naviserver-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/naviserver-devel

Reply via email to