[elinks-dev] [patch 3/5] #ifdef CONFIG_UTF_8, translate all terminal input via UCS-4.

Kalle Olavi Niemitalo Thu, 03 Aug 2006 16:24:02 -0700

#ifdef CONFIG_UTF_8, translate all terminal input via UCS-4.

---
commit f630e1e0081ffc6d0974f43868e5ae3d1c076847
tree 7428ac527293a5cd0efe2ee188fc2d7c2e0622b8
parent d25b037cf7e09aff4b0051804355e48d4fcc7460
author Kalle Olavi Niemitalo <[EMAIL PROTECTED]> Thu, 03 Aug 2006 23:55:05 +0300
committer Kalle Olavi Niemitalo <[EMAIL PROTECTED]> Thu, 03 Aug 2006 23:55:05 +0300


 src/intl/charsets.c  |   38 ++++++++++++++++++++++++++++------
 src/intl/charsets.h  |    1 +
 src/terminal/event.c |   56 +++++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 76 insertions(+), 19 deletions(-)

diff --git a/src/intl/charsets.c b/src/intl/charsets.c
index 33bc507..912ea09 100644
--- a/src/intl/charsets.c
+++ b/src/intl/charsets.c
@@ -458,22 +458,46 @@ utf_8_to_unicode(unsigned char **string,
 }
 #endif /* CONFIG_UTF_8 */
 
+/* Slow algorithm, the common part of cp2u and cp2utf_8.  */
+static unicode_val_T
+cp2u_shared(const struct codepage_desc *from, unsigned char c)
+{
+	int j;
+
+	for (j = 0; from->table[j].c; j++)
+		if (from->table[j].c == c)
+			return from->table[j].u;
+
+	return UCS_NO_CHAR;
+}
+
+#ifdef CONFIG_UTF_8
+/* Slow algorithm, used for converting input from the terminal.  */
+unicode_val_T
+cp2u(int from, unsigned char c)
+{
+	from &= ~SYSTEM_CHARSET_FLAG;
+
+	/* UTF-8 is a multibyte codepage and cannot be handled with
+	 * this function.  */
+	assert(codepages[from].table != table_utf_8);
+	if_assert_failed return UCS_NO_CHAR;
+
+	if (c < 0x80) return c;
+	else return cp2u_shared(&codepages[from], c);
+}
+#endif	/* CONFIG_UTF_8 */
+
 /* This slow and ugly code is used by the terminal utf_8_io */
 unsigned char *
 cp2utf_8(int from, int c)
 {
-	int j;
-
 	from &= ~SYSTEM_CHARSET_FLAG;
 
 	if (codepages[from].table == table_utf_8 || c < 128)
 		return strings[c];
 
-	for (j = 0; codepages[from].table[j].c; j++)
-		if (codepages[from].table[j].c == c)
-			return encode_utf_8(codepages[from].table[j].u);
-
-	return encode_utf_8(UCS_NO_CHAR);
+	return encode_utf_8(cp2u_shared(&codepages[from], c));
 }
 
 static void
diff --git a/src/intl/charsets.h b/src/intl/charsets.h
index 246606b..8d11707 100644
--- a/src/intl/charsets.h
+++ b/src/intl/charsets.h
@@ -64,6 +64,7 @@ int utf8_cells2bytes(unsigned char *, in
 inline int unicode_to_cell(unicode_val_T);
 inline int strlen_utf8(unsigned char **);
 inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *);
+unicode_val_T cp2u(int, unsigned char);
 #endif /* CONFIG_UTF_8 */
 
 unsigned char *cp2utf_8(int, int);
diff --git a/src/terminal/event.c b/src/terminal/event.c
index 5943aea..2937998 100644
--- a/src/terminal/event.c
+++ b/src/terminal/event.c
@@ -133,16 +133,17 @@ term_send_event(struct terminal *term, s
 }
 
 static void
-term_send_ucs(struct terminal *term, struct term_event *ev, unicode_val_T u)
+term_send_ucs(struct terminal *term, unicode_val_T u, int modifier)
 {
 	unsigned char *recoded;
+	struct term_event ev;
 
+	set_kbd_term_event(&ev, KBD_UNDEF, modifier);
 	recoded = u2cp_no_nbsp(u, get_opt_codepage_tree(term->spec, "charset"));
 	if (!recoded) recoded = "*";
 	while (*recoded) {
-		ev->info.keyboard.modifier = term->interlink->utf_8.modifier;
-		ev->info.keyboard.key = *recoded;
-		term_send_event(term, ev);
+		ev.info.keyboard.key = *recoded;
+		term_send_event(term, &ev);
 		recoded++;
 	}
 }
@@ -267,14 +268,14 @@ #endif
 	{
 		int utf8_io = -1;
 		int key = ilev->info.keyboard.key;
+		int modifier = ilev->info.keyboard.modifier;
 
 		if (key >= 0x100)
 			key = -key;
-		set_kbd_term_event(&tev, key, ilev->info.keyboard.modifier);
 
 		reset_timer();
 
-		if (check_kbd_modifier(&tev, KBD_MOD_CTRL) && toupper(key) == 'L') {
+		if (modifier == KBD_MOD_CTRL && (key == 'l' || key == 'L')) {
 			redraw_terminal_cls(term);
 			break;
 
@@ -283,14 +284,33 @@ #endif
 			return 0;
 		}
 
+		/* Character Conversions.  */
 #ifdef CONFIG_UTF_8
-		utf8_io = !!term->utf8;
+		/* struct term_event_keyboard carries UCS-4.
+		 * - If the "utf_8_io" option (i.e. term->utf8) is
+		 *   true or the "charset" option refers to UTF-8,
+		 *   then handle_interlink_event() converts from UTF-8
+		 *   to UCS-4.
+		 * - Otherwise, handle_interlink_event() converts from
+		 *   the codepage specified with the "charset" option
+		 *   to UCS-4.  */
+		utf8_io = term->utf8
+			|| is_cp_utf8(get_opt_codepage_tree(term->spec, "charset"));
 #else
+		/* struct term_event_keyboard carries bytes in the
+		 * charset of the terminal.
+		 * - If the "utf_8_io" option is true, then
+		 *   handle_interlink_event() converts from UTF-8 to
+		 *   UCS-4, and term_send_ucs() converts from UCS-4 to
+		 *   the codepage specified with the "charset" option;
+		 *   this codepage cannot be UTF-8.
+		 * - Otherwise, handle_interlink_event() passes the
+		 *   bytes straight through.  */
 		utf8_io = get_opt_bool_tree(term->spec, "utf_8_io");
 #endif /* CONFIG_UTF_8 */
 
 		if (interlink->utf_8.len) {
-			if ((key & 0xC0) == 0x80 && utf8_io) {
+			if (key >= 0x80 && key <= 0xBF && utf8_io) {
 				interlink->utf_8.ucs <<= 6;
 				interlink->utf_8.ucs |= key & 0x3F;
 				if (! --interlink->utf_8.len) {
@@ -298,17 +318,29 @@ #endif /* CONFIG_UTF_8 */
 
 					if (u < interlink->utf_8.min)
 						u = UCS_NO_CHAR;
-					term_send_ucs(term, &tev, u);
+					term_send_ucs(term, u,
+						      term->interlink->utf_8.modifier);
 				}
 				break;
 
 			} else {
 				interlink->utf_8.len = 0;
-				term_send_ucs(term, &tev, UCS_NO_CHAR);
+				term_send_ucs(term, UCS_NO_CHAR,
+					      term->interlink->utf_8.modifier);
 			}
 		}
 
 		if (key < 0x80 || key > 0xFF || !utf8_io) {
+#ifdef CONFIG_UTF_8
+			if (key >= 0 && key <= 0xFF && !utf8_io) {
+				key = cp2u(get_opt_codepage_tree(term->spec,
+								 "charset"),
+					   key);
+				term_send_ucs(term, key, modifier);
+				break;
+			}
+#endif /* !CONFIG_UTF_8 */
+			set_kbd_term_event(&tev, key, modifier);
 			term_send_event(term, &tev);
 			break;
 
@@ -324,11 +356,11 @@ #endif /* CONFIG_UTF_8 */
 
 			interlink->utf_8.len = len - 1;
 			interlink->utf_8.ucs = key & (mask - 1);
-			interlink->utf_8.modifier = get_kbd_modifier(&tev);
+			interlink->utf_8.modifier = modifier;
 			break;
 		}
 
-		term_send_ucs(term, &tev, UCS_NO_CHAR);
+		term_send_ucs(term, UCS_NO_CHAR, KBD_MOD_NONE);
 		break;
 	}

pgpIgW6ImDkTU.pgp
Description: PGP signature

_______________________________________________
elinks-dev mailing list
[email protected]
http://linuxfromscratch.org/mailman/listinfo/elinks-dev

[elinks-dev] [patch 3/5] #ifdef CONFIG_UTF_8, translate all terminal input via UCS-4.

Reply via email to