This time better patches.
Bookmarks in the xbel format are written using the System codepage.
Only the "&" is written as "&". The rest is written without
changes.

Witek
commit 9ac9ee459c4ec98c6cf253db304f6b84872eae68
Author: Witold Filipczyk <[EMAIL PROTECTED]>
Date:   Sat Aug 30 12:45:04 2008 +0200

    Moved declaration of the struct codepage_desc to the header file.

diff --git a/src/intl/charsets.c b/src/intl/charsets.c
index de853b9..03f7f2b 100644
--- a/src/intl/charsets.c
+++ b/src/intl/charsets.c
@@ -29,43 +29,6 @@
 #include "util/memory.h"
 #include "util/string.h"
 
-
-/* Fix namespace clash on MacOS. */
-#define table table_elinks
-
-struct table_entry {
-       unsigned char c;
-       /* This should in principle be unicode_val_T, but because all
-        * the values currently in codepage.inc fit in 16 bits, we can
-        * as well use uint16_t and halve sizeof(struct table_entry)
-        * from 8 bytes to 4.  Should other characters ever be needed,
-        * unicode_val_T u : 24 might be a possibility, although it
-        * seems a little unportable as bitfields are in principle
-        * restricted to int, which may be 16-bit.  */
-       uint16_t u;
-};
-
-struct codepage_desc {
-       unsigned char *name;
-       unsigned char *const *aliases;
-
-       /* The Unicode mappings of codepage bytes 0x80...0xFF.
-        * (0x00...0x7F are assumed to be ASCII in all codepages.)
-        * Because all current values fit in 16 bits, we store them as
-        * uint16_t rather than unicode_val_T.  If the codepage does
-        * not use some byte, then @highhalf maps that byte to 0xFFFF,
-        * which C code converts to UCS_REPLACEMENT_CHARACTER where
-        * appropriate.  (U+FFFF is reserved and will never be
-        * assigned as a character.)  */
-       const uint16_t *highhalf;
-
-       /* If some byte in the codepage corresponds to multiple Unicode
-        * characters, then the preferred character is in @highhalf
-        * above, and the rest are listed here in @table.  This table
-        * is not used for translating from the codepage to Unicode.  */
-       const struct table_entry *table;
-};
-
 #include "intl/codepage.inc"
 #include "intl/uni_7b.inc"
 #include "intl/entity.inc"
diff --git a/src/intl/charsets.h b/src/intl/charsets.h
index d87e2ee..5d77833 100644
--- a/src/intl/charsets.h
+++ b/src/intl/charsets.h
@@ -74,6 +74,44 @@ struct conv_table {
        } u;
 };
 
+/* Fix namespace clash on MacOS. */
+#define table table_elinks
+
+struct table_entry {
+       unsigned char c;
+       /* This should in principle be unicode_val_T, but because all
+        * the values currently in codepage.inc fit in 16 bits, we can
+        * as well use uint16_t and halve sizeof(struct table_entry)
+        * from 8 bytes to 4.  Should other characters ever be needed,
+        * unicode_val_T u : 24 might be a possibility, although it
+        * seems a little unportable as bitfields are in principle
+        * restricted to int, which may be 16-bit.  */
+       uint16_t u;
+};
+
+struct codepage_desc {
+       unsigned char *name;
+       unsigned char *const *aliases;
+
+       /* The Unicode mappings of codepage bytes 0x80...0xFF.
+        * (0x00...0x7F are assumed to be ASCII in all codepages.)
+        * Because all current values fit in 16 bits, we store them as
+        * uint16_t rather than unicode_val_T.  If the codepage does
+        * not use some byte, then @highhalf maps that byte to 0xFFFF,
+        * which C code converts to UCS_REPLACEMENT_CHARACTER where
+        * appropriate.  (U+FFFF is reserved and will never be
+        * assigned as a character.)  */
+       const uint16_t *highhalf;
+
+       /* If some byte in the codepage corresponds to multiple Unicode
+        * characters, then the preferred character is in @highhalf
+        * above, and the rest are listed here in @table.  This table
+        * is not used for translating from the codepage to Unicode.  */
+       const struct table_entry *table;
+};
+
+extern const struct codepage_desc codepages[];
+
 enum convert_string_mode {
        CSM_DEFAULT, /* Convert any char. */
        CSM_QUERY, /* Special handling of '&' and '=' chars. */
commit 3bcfa83ad82064f363e8e22eeb3a4ac12e415bd1
Author: Witold Filipczyk <[EMAIL PROTECTED]>
Date:   Sat Aug 30 14:22:01 2008 +0200

    Support national characters in xbel. The bookmarks.xbel is written
    using the system codepage.

diff --git a/src/bookmarks/backend/xbel.c b/src/bookmarks/backend/xbel.c
index 432d3ba..f3fbff3 100644
--- a/src/bookmarks/backend/xbel.c
+++ b/src/bookmarks/backend/xbel.c
@@ -55,8 +55,8 @@ static unsigned char *get_attribute_value(struct tree_node 
*node,
 
 static void read_bookmarks_xbel(FILE *f);
 static unsigned char * filename_bookmarks_xbel(int writing);
-static int xbeltree_to_bookmarks_list(struct tree_node *root,
-                                     struct bookmark *current_parent);
+static int xbeltree_to_bookmarks_list(struct conv_table *tab,
+       struct tree_node *root, struct bookmark *current_parent);
 static void write_bookmarks_list(struct secure_save_info *ssi,
                                 LIST_OF(struct bookmark) *bookmarks_list,
                                 int n, int folder_state);
@@ -82,6 +82,25 @@ static struct tree_node *current_node = NULL;
  * different format. */
 static int readok = 1;
 
+static int XMLCALL
+unknown_encoding(void *data, const char *name, XML_Encoding *info)
+{
+       int index = get_cp_index(name);
+       int i;
+
+       if (index < 0) return XML_STATUS_ERROR;
+       /* FIXME: SYSTEM_CHARSET_FLAG should be in intl/charsets.h. */
+       index &= ~128;
+       for (i = 0; i < 128; i++) {
+               info->map[i] = i;
+       }
+       for (; i < 256; i++) {
+               info->map[i] = codepages[index].highhalf[i - 128];
+       }
+       return XML_STATUS_OK;
+}
+
+
 static void
 read_bookmarks_xbel(FILE *f)
 {
@@ -100,6 +119,7 @@ read_bookmarks_xbel(FILE *f)
 
        XML_SetElementHandler(p, on_element_open, on_element_close);
        XML_SetCharacterDataHandler(p, on_text);
+       XML_SetUnknownEncodingHandler(p, unknown_encoding, NULL);
 
        while (!done && !err) {
                size_t len = fread(in_buffer, 1, BUFSIZ, f);
@@ -125,7 +145,13 @@ read_bookmarks_xbel(FILE *f)
                }
        }
 
-       if (!err) readok = xbeltree_to_bookmarks_list(root_node->children, 
NULL); /* Top node is xbel */
+       if (!err) {
+               int cp = get_cp_index("System") & ~128;
+               int utf8 = get_cp_index("utf-8");
+               struct conv_table *tab = get_translation_table(utf8, cp);
+
+               readok = xbeltree_to_bookmarks_list(tab, root_node->children, 
NULL); /* Top node is xbel */
+       }
 
        XML_ParserFree(p);
        free_xbeltree(root_node);
@@ -139,13 +165,13 @@ write_bookmarks_xbel(struct secure_save_info *ssi,
        int folder_state = get_opt_bool("bookmarks.folder_state", NULL);
        /* We check for readok in filename_bookmarks_xbel(). */
 
-       secure_fputs(ssi,
-               "<?xml version=\"1.0\"?>\n"
+       secure_fprintf(ssi,
+               "<?xml version=\"1.0\" encoding=\"%s\"?>\n"
                "<!DOCTYPE xbel PUBLIC \"+//IDN python.org//DTD XML "
                "Bookmark Exchange Language 1.0//EN//XML\"\n"
                "                      "
                "\"http://www.python.org/topics/xml/dtds/xbel-1.0.dtd\";>\n\n"
-               "<xbel>\n\n\n");
+               "<xbel>\n\n\n", get_cp_mime_name(get_cp_index("System")));
 
 
        write_bookmarks_list(ssi, bookmarks_list, 0, folder_state);
@@ -168,6 +194,15 @@ indentation(struct secure_save_info *ssi, int num)
                secure_fputs(ssi, "    ");
 }
 
+static void
+ampersand(struct secure_save_info *ssi, const unsigned char *str)
+{
+       for (; *str; str++) {
+               if (*str != '&') secure_fputc(ssi, *str);
+               else secure_fputs(ssi, "&amp;");
+       }
+}
+#if 0
 /* FIXME This is totally broken, we should use the Unicode value in
  *       numeric entities.
  *       Additionally it is slow, not elegant, incomplete and
@@ -205,6 +240,7 @@ print_xml_entities(struct secure_save_info *ssi, const 
unsigned char *str)
 #undef accept_char
 
 }
+#endif
 
 static void
 write_bookmarks_list(struct secure_save_info *ssi,
@@ -225,7 +261,7 @@ write_bookmarks_list(struct secure_save_info *ssi,
 
                        indentation(ssi, n + 2);
                        secure_fputs(ssi, "<title>");
-                       print_xml_entities(ssi, bm->title);
+                       ampersand(ssi, bm->title);
                        secure_fputs(ssi, "</title>\n");
 
                        if (!list_empty(bm->child))
@@ -237,12 +273,12 @@ write_bookmarks_list(struct secure_save_info *ssi,
                } else if (bm->box_item->type == BI_LEAF) {
 
                        secure_fputs(ssi, "<bookmark href=\"");
-                       print_xml_entities(ssi, bm->url);
+                       ampersand(ssi, bm->url);
                        secure_fputs(ssi, "\">\n");
 
                        indentation(ssi, n + 2);
                        secure_fputs(ssi, "<title>");
-                       print_xml_entities(ssi, bm->title);
+                       ampersand(ssi, bm->title);
                        secure_fputs(ssi, "</title>\n");
 
                        indentation(ssi, n + 1);
@@ -370,7 +406,7 @@ on_text(void *data, const XML_Char *text, int len)
 /* xbel_tree_to_bookmarks_list: returns 0 on fail,
  *                                   1 on success */
 static int
-xbeltree_to_bookmarks_list(struct tree_node *node,
+xbeltree_to_bookmarks_list(struct conv_table *tab, struct tree_node *node,
                           struct bookmark *current_parent)
 {
        struct bookmark *tmp;
@@ -379,14 +415,22 @@ xbeltree_to_bookmarks_list(struct tree_node *node,
 
        while (node) {
                if (!strcmp(node->name, "bookmark")) {
-                       unsigned char *href;
+                       unsigned char *href, *text = NULL;
 
                        title = get_child(node, "title");
                        href = get_attribute_value(node, "href");
-
+                       if (href) href = convert_string(tab, href,
+                                       strlen(href), 0, CSM_NONE,
+                                       NULL, NULL, NULL);
+
+                       if (title && title->text) {
+                               text = convert_string(tab, title->text,
+                                       strlen(title->text), 0, CSM_NONE,
+                                       NULL, NULL, NULL);
+                       }
                        tmp = add_bookmark(current_parent, 0,
                                           /* The <title> element is optional */
-                                          title && title->text ? title->text
+                                          text ? text
                                                 : (unsigned char *) 
gettext("No title"),
                                           /* XXX: The href attribute isn't 
optional but
                                            * we don't validate the source XML 
yet, so
@@ -395,6 +439,9 @@ xbeltree_to_bookmarks_list(struct tree_node *node,
                                           href ? href
                                                : (unsigned char *) gettext("No 
URL"));
 
+                       mem_free_if(text);
+                       mem_free_if(href);
+
                        /* Out of memory */
                        if (!tmp) return 0;
 
@@ -402,14 +449,20 @@ xbeltree_to_bookmarks_list(struct tree_node *node,
                        lastbm = tmp;
 
                } else if (!strcmp(node->name, "folder")) {
-                       unsigned char *folded;
+                       unsigned char *folded, *text = NULL;
 
                        title = get_child(node, "title");
+                       if (title && title->text) {
+                               text = convert_string(tab, title->text,
+                                       strlen(title->text), 0, CSM_NONE,
+                                       NULL, NULL, NULL);
+                       }
 
                        tmp = add_bookmark(current_parent, 0,
-                                          title && title->text ? title->text
+                                          text ? text
                                                 : (unsigned char *) 
gettext("No title"),
                                           NULL);
+                       mem_free_if(text);
 
                        /* Out of memory */
                        if (!tmp) return 0;
@@ -435,9 +488,9 @@ xbeltree_to_bookmarks_list(struct tree_node *node,
                        /* If this node is a <folder> element, current parent
                         * changes */
                        ret = (!strcmp(node->name, "folder") ?
-                               xbeltree_to_bookmarks_list(node->children,
+                               xbeltree_to_bookmarks_list(tab, node->children,
                                                           lastbm) :
-                               xbeltree_to_bookmarks_list(node->children,
+                               xbeltree_to_bookmarks_list(tab, node->children,
                                                           current_parent));
                        /* Out of memory */
                        if (!ret) return 0;
diff --git a/src/bookmarks/bookmarks.c b/src/bookmarks/bookmarks.c
index 7f6ebf4..45c945b 100644
--- a/src/bookmarks/bookmarks.c
+++ b/src/bookmarks/bookmarks.c
@@ -54,13 +54,13 @@ static struct option_info bookmark_options_info[] = {
                "file_format", 0, 0, 1, 0,
                N_("File format for bookmarks (affects both reading and 
saving):\n"
                "0 is the default native ELinks format\n"
-               "1 is XBEL universal XML bookmarks format (ELinks bug 153: NO 
NATIONAL CHARS SUPPORT!)")),
+               "1 is XBEL universal XML bookmarks format")),
 #else
        INIT_OPT_INT("bookmarks", N_("File format"),
                "file_format", 0, 0, 1, 0,
                N_("File format for bookmarks (affects both reading and 
saving):\n"
                "0 is the default native ELinks format\n"
-               "1 is XBEL universal XML bookmarks format (ELinks bug 153: NO 
NATIONAL CHARS SUPPORT!)"
+               "1 is XBEL universal XML bookmarks format"
                "  (DISABLED)")),
 #endif
 
_______________________________________________
elinks-dev mailing list
[email protected]
http://linuxfromscratch.org/mailman/listinfo/elinks-dev

Reply via email to