This time better patches.
Bookmarks in the xbel format are written using the System codepage.
Only the "&" is written as "&". The rest is written without
changes.
Witek
commit 9ac9ee459c4ec98c6cf253db304f6b84872eae68
Author: Witold Filipczyk <[EMAIL PROTECTED]>
Date: Sat Aug 30 12:45:04 2008 +0200
Moved declaration of the struct codepage_desc to the header file.
diff --git a/src/intl/charsets.c b/src/intl/charsets.c
index de853b9..03f7f2b 100644
--- a/src/intl/charsets.c
+++ b/src/intl/charsets.c
@@ -29,43 +29,6 @@
#include "util/memory.h"
#include "util/string.h"
-
-/* Fix namespace clash on MacOS. */
-#define table table_elinks
-
-struct table_entry {
- unsigned char c;
- /* This should in principle be unicode_val_T, but because all
- * the values currently in codepage.inc fit in 16 bits, we can
- * as well use uint16_t and halve sizeof(struct table_entry)
- * from 8 bytes to 4. Should other characters ever be needed,
- * unicode_val_T u : 24 might be a possibility, although it
- * seems a little unportable as bitfields are in principle
- * restricted to int, which may be 16-bit. */
- uint16_t u;
-};
-
-struct codepage_desc {
- unsigned char *name;
- unsigned char *const *aliases;
-
- /* The Unicode mappings of codepage bytes 0x80...0xFF.
- * (0x00...0x7F are assumed to be ASCII in all codepages.)
- * Because all current values fit in 16 bits, we store them as
- * uint16_t rather than unicode_val_T. If the codepage does
- * not use some byte, then @highhalf maps that byte to 0xFFFF,
- * which C code converts to UCS_REPLACEMENT_CHARACTER where
- * appropriate. (U+FFFF is reserved and will never be
- * assigned as a character.) */
- const uint16_t *highhalf;
-
- /* If some byte in the codepage corresponds to multiple Unicode
- * characters, then the preferred character is in @highhalf
- * above, and the rest are listed here in @table. This table
- * is not used for translating from the codepage to Unicode. */
- const struct table_entry *table;
-};
-
#include "intl/codepage.inc"
#include "intl/uni_7b.inc"
#include "intl/entity.inc"
diff --git a/src/intl/charsets.h b/src/intl/charsets.h
index d87e2ee..5d77833 100644
--- a/src/intl/charsets.h
+++ b/src/intl/charsets.h
@@ -74,6 +74,44 @@ struct conv_table {
} u;
};
+/* Fix namespace clash on MacOS. */
+#define table table_elinks
+
+struct table_entry {
+ unsigned char c;
+ /* This should in principle be unicode_val_T, but because all
+ * the values currently in codepage.inc fit in 16 bits, we can
+ * as well use uint16_t and halve sizeof(struct table_entry)
+ * from 8 bytes to 4. Should other characters ever be needed,
+ * unicode_val_T u : 24 might be a possibility, although it
+ * seems a little unportable as bitfields are in principle
+ * restricted to int, which may be 16-bit. */
+ uint16_t u;
+};
+
+struct codepage_desc {
+ unsigned char *name;
+ unsigned char *const *aliases;
+
+ /* The Unicode mappings of codepage bytes 0x80...0xFF.
+ * (0x00...0x7F are assumed to be ASCII in all codepages.)
+ * Because all current values fit in 16 bits, we store them as
+ * uint16_t rather than unicode_val_T. If the codepage does
+ * not use some byte, then @highhalf maps that byte to 0xFFFF,
+ * which C code converts to UCS_REPLACEMENT_CHARACTER where
+ * appropriate. (U+FFFF is reserved and will never be
+ * assigned as a character.) */
+ const uint16_t *highhalf;
+
+ /* If some byte in the codepage corresponds to multiple Unicode
+ * characters, then the preferred character is in @highhalf
+ * above, and the rest are listed here in @table. This table
+ * is not used for translating from the codepage to Unicode. */
+ const struct table_entry *table;
+};
+
+extern const struct codepage_desc codepages[];
+
enum convert_string_mode {
CSM_DEFAULT, /* Convert any char. */
CSM_QUERY, /* Special handling of '&' and '=' chars. */
commit 3bcfa83ad82064f363e8e22eeb3a4ac12e415bd1
Author: Witold Filipczyk <[EMAIL PROTECTED]>
Date: Sat Aug 30 14:22:01 2008 +0200
Support national characters in xbel. The bookmarks.xbel is written
using the system codepage.
diff --git a/src/bookmarks/backend/xbel.c b/src/bookmarks/backend/xbel.c
index 432d3ba..f3fbff3 100644
--- a/src/bookmarks/backend/xbel.c
+++ b/src/bookmarks/backend/xbel.c
@@ -55,8 +55,8 @@ static unsigned char *get_attribute_value(struct tree_node
*node,
static void read_bookmarks_xbel(FILE *f);
static unsigned char * filename_bookmarks_xbel(int writing);
-static int xbeltree_to_bookmarks_list(struct tree_node *root,
- struct bookmark *current_parent);
+static int xbeltree_to_bookmarks_list(struct conv_table *tab,
+ struct tree_node *root, struct bookmark *current_parent);
static void write_bookmarks_list(struct secure_save_info *ssi,
LIST_OF(struct bookmark) *bookmarks_list,
int n, int folder_state);
@@ -82,6 +82,25 @@ static struct tree_node *current_node = NULL;
* different format. */
static int readok = 1;
+static int XMLCALL
+unknown_encoding(void *data, const char *name, XML_Encoding *info)
+{
+ int index = get_cp_index(name);
+ int i;
+
+ if (index < 0) return XML_STATUS_ERROR;
+ /* FIXME: SYSTEM_CHARSET_FLAG should be in intl/charsets.h. */
+ index &= ~128;
+ for (i = 0; i < 128; i++) {
+ info->map[i] = i;
+ }
+ for (; i < 256; i++) {
+ info->map[i] = codepages[index].highhalf[i - 128];
+ }
+ return XML_STATUS_OK;
+}
+
+
static void
read_bookmarks_xbel(FILE *f)
{
@@ -100,6 +119,7 @@ read_bookmarks_xbel(FILE *f)
XML_SetElementHandler(p, on_element_open, on_element_close);
XML_SetCharacterDataHandler(p, on_text);
+ XML_SetUnknownEncodingHandler(p, unknown_encoding, NULL);
while (!done && !err) {
size_t len = fread(in_buffer, 1, BUFSIZ, f);
@@ -125,7 +145,13 @@ read_bookmarks_xbel(FILE *f)
}
}
- if (!err) readok = xbeltree_to_bookmarks_list(root_node->children,
NULL); /* Top node is xbel */
+ if (!err) {
+ int cp = get_cp_index("System") & ~128;
+ int utf8 = get_cp_index("utf-8");
+ struct conv_table *tab = get_translation_table(utf8, cp);
+
+ readok = xbeltree_to_bookmarks_list(tab, root_node->children,
NULL); /* Top node is xbel */
+ }
XML_ParserFree(p);
free_xbeltree(root_node);
@@ -139,13 +165,13 @@ write_bookmarks_xbel(struct secure_save_info *ssi,
int folder_state = get_opt_bool("bookmarks.folder_state", NULL);
/* We check for readok in filename_bookmarks_xbel(). */
- secure_fputs(ssi,
- "<?xml version=\"1.0\"?>\n"
+ secure_fprintf(ssi,
+ "<?xml version=\"1.0\" encoding=\"%s\"?>\n"
"<!DOCTYPE xbel PUBLIC \"+//IDN python.org//DTD XML "
"Bookmark Exchange Language 1.0//EN//XML\"\n"
" "
"\"http://www.python.org/topics/xml/dtds/xbel-1.0.dtd\">\n\n"
- "<xbel>\n\n\n");
+ "<xbel>\n\n\n", get_cp_mime_name(get_cp_index("System")));
write_bookmarks_list(ssi, bookmarks_list, 0, folder_state);
@@ -168,6 +194,15 @@ indentation(struct secure_save_info *ssi, int num)
secure_fputs(ssi, " ");
}
+static void
+ampersand(struct secure_save_info *ssi, const unsigned char *str)
+{
+ for (; *str; str++) {
+ if (*str != '&') secure_fputc(ssi, *str);
+ else secure_fputs(ssi, "&");
+ }
+}
+#if 0
/* FIXME This is totally broken, we should use the Unicode value in
* numeric entities.
* Additionally it is slow, not elegant, incomplete and
@@ -205,6 +240,7 @@ print_xml_entities(struct secure_save_info *ssi, const
unsigned char *str)
#undef accept_char
}
+#endif
static void
write_bookmarks_list(struct secure_save_info *ssi,
@@ -225,7 +261,7 @@ write_bookmarks_list(struct secure_save_info *ssi,
indentation(ssi, n + 2);
secure_fputs(ssi, "<title>");
- print_xml_entities(ssi, bm->title);
+ ampersand(ssi, bm->title);
secure_fputs(ssi, "</title>\n");
if (!list_empty(bm->child))
@@ -237,12 +273,12 @@ write_bookmarks_list(struct secure_save_info *ssi,
} else if (bm->box_item->type == BI_LEAF) {
secure_fputs(ssi, "<bookmark href=\"");
- print_xml_entities(ssi, bm->url);
+ ampersand(ssi, bm->url);
secure_fputs(ssi, "\">\n");
indentation(ssi, n + 2);
secure_fputs(ssi, "<title>");
- print_xml_entities(ssi, bm->title);
+ ampersand(ssi, bm->title);
secure_fputs(ssi, "</title>\n");
indentation(ssi, n + 1);
@@ -370,7 +406,7 @@ on_text(void *data, const XML_Char *text, int len)
/* xbel_tree_to_bookmarks_list: returns 0 on fail,
* 1 on success */
static int
-xbeltree_to_bookmarks_list(struct tree_node *node,
+xbeltree_to_bookmarks_list(struct conv_table *tab, struct tree_node *node,
struct bookmark *current_parent)
{
struct bookmark *tmp;
@@ -379,14 +415,22 @@ xbeltree_to_bookmarks_list(struct tree_node *node,
while (node) {
if (!strcmp(node->name, "bookmark")) {
- unsigned char *href;
+ unsigned char *href, *text = NULL;
title = get_child(node, "title");
href = get_attribute_value(node, "href");
-
+ if (href) href = convert_string(tab, href,
+ strlen(href), 0, CSM_NONE,
+ NULL, NULL, NULL);
+
+ if (title && title->text) {
+ text = convert_string(tab, title->text,
+ strlen(title->text), 0, CSM_NONE,
+ NULL, NULL, NULL);
+ }
tmp = add_bookmark(current_parent, 0,
/* The <title> element is optional */
- title && title->text ? title->text
+ text ? text
: (unsigned char *)
gettext("No title"),
/* XXX: The href attribute isn't
optional but
* we don't validate the source XML
yet, so
@@ -395,6 +439,9 @@ xbeltree_to_bookmarks_list(struct tree_node *node,
href ? href
: (unsigned char *) gettext("No
URL"));
+ mem_free_if(text);
+ mem_free_if(href);
+
/* Out of memory */
if (!tmp) return 0;
@@ -402,14 +449,20 @@ xbeltree_to_bookmarks_list(struct tree_node *node,
lastbm = tmp;
} else if (!strcmp(node->name, "folder")) {
- unsigned char *folded;
+ unsigned char *folded, *text = NULL;
title = get_child(node, "title");
+ if (title && title->text) {
+ text = convert_string(tab, title->text,
+ strlen(title->text), 0, CSM_NONE,
+ NULL, NULL, NULL);
+ }
tmp = add_bookmark(current_parent, 0,
- title && title->text ? title->text
+ text ? text
: (unsigned char *)
gettext("No title"),
NULL);
+ mem_free_if(text);
/* Out of memory */
if (!tmp) return 0;
@@ -435,9 +488,9 @@ xbeltree_to_bookmarks_list(struct tree_node *node,
/* If this node is a <folder> element, current parent
* changes */
ret = (!strcmp(node->name, "folder") ?
- xbeltree_to_bookmarks_list(node->children,
+ xbeltree_to_bookmarks_list(tab, node->children,
lastbm) :
- xbeltree_to_bookmarks_list(node->children,
+ xbeltree_to_bookmarks_list(tab, node->children,
current_parent));
/* Out of memory */
if (!ret) return 0;
diff --git a/src/bookmarks/bookmarks.c b/src/bookmarks/bookmarks.c
index 7f6ebf4..45c945b 100644
--- a/src/bookmarks/bookmarks.c
+++ b/src/bookmarks/bookmarks.c
@@ -54,13 +54,13 @@ static struct option_info bookmark_options_info[] = {
"file_format", 0, 0, 1, 0,
N_("File format for bookmarks (affects both reading and
saving):\n"
"0 is the default native ELinks format\n"
- "1 is XBEL universal XML bookmarks format (ELinks bug 153: NO
NATIONAL CHARS SUPPORT!)")),
+ "1 is XBEL universal XML bookmarks format")),
#else
INIT_OPT_INT("bookmarks", N_("File format"),
"file_format", 0, 0, 1, 0,
N_("File format for bookmarks (affects both reading and
saving):\n"
"0 is the default native ELinks format\n"
- "1 is XBEL universal XML bookmarks format (ELinks bug 153: NO
NATIONAL CHARS SUPPORT!)"
+ "1 is XBEL universal XML bookmarks format"
" (DISABLED)")),
#endif
_______________________________________________
elinks-dev mailing list
[email protected]
http://linuxfromscratch.org/mailman/listinfo/elinks-dev