terminal UTF-8: term_event_keyboard.key is UCS-4, #ifdef CONFIG_UTF_8. Form fields and BFU text-input widgets then convert from UCS-4 to UTF-8. If not all UTF-8 bytes fit, they don't insert anything. Thus it is no longer possible to get invalid UTF-8 by hitting the length limit.
It is unclear to me which charset is supposed to be used for strings
in internal buffers. I made BFU insert UTF-8 whenever CONFIG_UTF_8,
but form fields use the charset of the terminal; that may have to be
changed.
As a side effect, this change should solve bug 782, because
term_send_ucs no longer encodes in UTF-8 if CONFIG_UTF_8 is defined.
I think the UTF-8 and codepage encoding calls I added are safe, too.
A similar bug may still surface somewhere else, but 782 could be
closed for now.
This change also lays the foundation for binding actions to non-ASCII
keys, but the keystroke name parser doesn't yet support that.
The CONFIG_UTF_8 mode does not currently support non-ASCII characters
in hot keys, either.
---
commit e019e3802e1709737cd6850e4ea3bf90ea585cc6
tree b696459ad3e89fd1f1307f893153c5901eb38827
parent 9e1fcebe33cf251fea537c9a06107004d9ba729b
author Kalle Olavi Niemitalo <[EMAIL PROTECTED]> Sat, 05 Aug 2006 19:38:15 +0300
committer Kalle Olavi Niemitalo <[EMAIL PROTECTED]> Sat, 05 Aug 2006 20:44:06 +0300
src/bfu/inpfield.c | 33 ++++++++++++-----------------
src/terminal/event.c | 19 ++++++++++-------
src/terminal/event.h | 14 ++++++++++--
src/terminal/kbd.h | 4 ++-
src/viewer/text/form.c | 55 +++++++++++++++++++-----------------------------
5 files changed, 61 insertions(+), 64 deletions(-)
diff --git a/src/bfu/inpfield.c b/src/bfu/inpfield.c
index ff7c9c1..dcb948a 100644
--- a/src/bfu/inpfield.c
+++ b/src/bfu/inpfield.c
@@ -680,32 +680,27 @@ #endif /* CONFIG_UTF_8 */
if (check_kbd_textinput_key(ev)) {
unsigned char *text = widget_data->cdata;
int textlen = strlen(text);
+#ifdef CONFIG_UTF_8
+ const unsigned char *ins = encode_utf_8(get_kbd_key(ev));
+ int inslen = utf8charlen(ins);
+#else /* !CONFIG_UTF_8 */
+ const int inslen = 1;
+#endif /* !CONFIG_UTF_8 */
- if (textlen >= widget_data->widget->datalen - 1)
+ if (textlen >= widget_data->widget->datalen - inslen)
goto display_field;
/* Shift to position of the cursor */
textlen -= widget_data->info.field.cpos;
- text += widget_data->info.field.cpos++;
+ text += widget_data->info.field.cpos;
- memmove(text + 1, text, textlen + 1);
- *text = get_kbd_key(ev);
+ memmove(text + inslen, text, textlen + 1);
#ifdef CONFIG_UTF_8
- if (term->utf8) {
- static unsigned char buf[7];
- unsigned char *t = buf;
- static int i = 0;
- unicode_val_T data;
-
- buf[i++] = *text;
- buf[i] = '\0';
- data = utf_8_to_unicode(&t, buf + i);
- if (i == 6) i = 0;
- if (data == UCS_NO_CHAR)
- return EVENT_PROCESSED;
- else i = 0;
- }
-#endif /* CONFIG_UTF_8 */
+ memcpy(text, ins, inslen);
+#else /* !CONFIG_UTF_8 */
+ *text = get_kbd_key(ev);
+#endif /* !CONFIG_UTF_8 */
+ widget_data->info.field.cpos += inslen;
goto display_field;
}
}
diff --git a/src/terminal/event.c b/src/terminal/event.c
index 7737cec..f106fa4 100644
--- a/src/terminal/event.c
+++ b/src/terminal/event.c
@@ -129,8 +129,14 @@ term_send_event(struct terminal *term, s
static void
term_send_ucs(struct terminal *term, unicode_val_T u, int modifier)
{
- unsigned char *recoded;
+#ifdef CONFIG_UTF_8
+ struct term_event ev;
+
+ set_kbd_term_event(&ev, u, modifier);
+ term_send_event(term, &ev);
+#else /* !CONFIG_UTF_8 */
struct term_event ev;
+ unsigned char *recoded;
set_kbd_term_event(&ev, KBD_UNDEF, modifier);
recoded = u2cp_no_nbsp(u, get_opt_codepage_tree(term->spec, "charset"));
@@ -140,6 +146,7 @@ term_send_ucs(struct terminal *term, uni
term_send_event(term, &ev);
recoded++;
}
+#endif /* !CONFIG_UTF_8 */
}
static void
@@ -282,18 +289,14 @@ #endif
/* Character Conversions. */
#ifdef CONFIG_UTF_8
- /* struct term_event_keyboard carries bytes in the
- * charset of the terminal.
+ /* struct term_event_keyboard carries UCS-4.
* - If the "utf_8_io" option (i.e. term->utf8) is
* true or the "charset" option refers to UTF-8,
* then handle_interlink_event() converts from UTF-8
- * to UCS-4, and term_send_ucs() converts from UCS-4
- * to the codepage specified with the "charset" option.
+ * to UCS-4.
* - Otherwise, handle_interlink_event() converts from
* the codepage specified with the "charset" option
- * to UCS-4, and term_send_ucs() converts right back.
- * TO DO: Change struct term_event_keyboard to carry
- * UCS-4 instead, reducing these conversions. */
+ * to UCS-4. */
utf8_io = term->utf8
|| is_cp_utf8(get_opt_codepage_tree(term->spec, "charset"));
#else
diff --git a/src/terminal/event.h b/src/terminal/event.h
index 9a491e3..63a420a 100644
--- a/src/terminal/event.h
+++ b/src/terminal/event.h
@@ -165,8 +165,18 @@ #define check_kbd_key(event, key) (kbd_k
#define get_kbd_modifier(event) (kbd_get_modifier(&(event)->info.keyboard))
#define check_kbd_modifier(event, mod) (kbd_modifier_is(&(event)->info.keyboard, (mod)))
-#define check_kbd_textinput_key(event) (get_kbd_key(event) >= ' ' && get_kbd_key(event) < 256 && check_kbd_modifier(event, KBD_MOD_NONE))
-#define check_kbd_label_key(event) (get_kbd_key(event) > ' ' && get_kbd_key(event) < 256)
+#define check_kbd_textinput_key(event) (get_kbd_key(event) >= ' ' && check_kbd_modifier(event, KBD_MOD_NONE))
+#ifdef CONFIG_UTF_8
+/* We must currently limit hotkeys of labels to ASCII, because
+ * get_kbd_key(event) is in UCS-4 and various event handlers pass it
+ * to toupper() if check_kbd_label_key() returns true.
+ * TO DO: Change the event handlers to use unicode_fold_label_case()
+ * instead. The code that extracts the hotkey from the label string
+ * will also have to be changed. */
+#define check_kbd_label_key(event) (get_kbd_key(event) > ' ' && get_kbd_key(event) <= 0x7F)
+#else /* !CONFIG_UTF_8 */
+#define check_kbd_label_key(event) (get_kbd_key(event) > ' ')
+#endif /* !CONFIG_UTF_8 */
/* For mouse events handling */
diff --git a/src/terminal/kbd.h b/src/terminal/kbd.h
index b9672e4..a4ee0ca 100644
--- a/src/terminal/kbd.h
+++ b/src/terminal/kbd.h
@@ -7,8 +7,8 @@ struct term_event_keyboard {
/* Values <= -0x100 are special; e.g. KBD_ENTER.
* Values between -0xFF and -2 are not used yet; treat as special.
* Value == -1 is KBD_UNDEF; not sent via socket.
- * Values between 0 and 0xFF are bytes received from the terminal.
- * Values >= 0x100 are not used. */
+ * Values >= 0 are characters received from the terminal;
+ * in UCS-4 #ifdef CONFIG_UTF_8. */
int key;
int modifier;
};
diff --git a/src/viewer/text/form.c b/src/viewer/text/form.c
index 867e001..ee3ad33 100644
--- a/src/viewer/text/form.c
+++ b/src/viewer/text/form.c
@@ -1712,8 +1712,8 @@ #endif /* CONFIG_UTF_8 */
}
if (form_field_is_readonly(fc)
- || strlen(fs->value) >= fc->maxlength
#ifndef CONFIG_UTF_8
+ || strlen(fs->value) >= fc->maxlength
|| !insert_in_string(&fs->value, fs->state, "?", 1)
#endif /* CONFIG_UTF_8 */
)
@@ -1721,42 +1721,31 @@ #endif /* CONFIG_UTF_8 */
status = FRAME_EVENT_OK;
break;
}
+
#ifdef CONFIG_UTF_8
- if (utf8) {
- static unsigned char buf[7];
- static int i = 0;
- unicode_val_T data;
- unsigned char *t;
-
- t = buf;
- buf[i++] = get_kbd_key(ev);
- buf[i] = 0;
- data = utf_8_to_unicode(&t, buf + i);
- if (data != UCS_NO_CHAR) {
- if (!insert_in_string(&fs->value, fs->state, buf, i)) {
- i = 0;
- return FRAME_EVENT_OK;
- }
- fs->state += i;
- if (fc->type == FC_PASSWORD)
- fs->state_cell++;
- else if (fc->type == FC_TEXTAREA)
- fs->state_cell = 0;
- else
- fs->state_cell += unicode_to_cell(data);
- i = 0;
+ {
+ /* The charset of the terminal; we assume
+ * fs->value is in this charset.
+ * (Is that OK?) */
+ int cp = get_opt_codepage_tree(ses->tab->term->spec,
+ "charset");
+
+ text = u2cp_no_nbsp(get_kbd_key(ev), cp);
+ length = strlen(text);
+
+ if (strlen(fs->value) + length > fc->maxlength
+ || !insert_in_string(&fs->value, fs->state, text, length)) {
+ status = FRAME_EVENT_OK;
break;
}
- if (i == 6) {
- i = 0;
- }
- return FRAME_EVENT_OK;
-
- } else {
- if (!insert_in_string(&fs->value, fs->state, "?", 1))
- return FRAME_EVENT_OK;
- fs->value[fs->state++] = get_kbd_key(ev);
+ fs->state += length;
+ if (fc->type == FC_PASSWORD)
+ fs->state_cell += (is_cp_utf8(cp) ? 1 : length);
+ else if (fc->type == FC_TEXTAREA)
+ fs->state_cell = 0;
+ else
+ fs->state_cell += (is_cp_utf8(cp) ? unicode_to_cell(get_kbd_key(ev)) : length);
}
#else
fs->value[fs->state++] = get_kbd_key(ev);
pgpMlnJs76OFS.pgp
Description: PGP signature
_______________________________________________ elinks-dev mailing list [email protected] http://linuxfromscratch.org/mailman/listinfo/elinks-dev
