On 11.09.18 22:38, Heinrich Schuchardt wrote: > Up to now the EFI_TEXT_INPUT_PROTOCOL only supported ASCII characters. > With the patch it can consume UTF-8 from the console. > > Currently only the serial console and the console can deliver UTF-8. > Local consoles are restricted to ASCII. > > Signed-off-by: Heinrich Schuchardt <xypron.g...@gmx.de> > --- > v2: > drop support for German keyboard > move reading of Unicode code to charset.c > --- > include/charset.h | 9 +++ > lib/charset.c | 136 ++++++++++++++++++++++------------- > lib/efi_loader/efi_console.c | 13 ++-- > test/unicode_ut.c | 8 +-- > 4 files changed, 108 insertions(+), 58 deletions(-) > > diff --git a/include/charset.h b/include/charset.h > index 686db5a1fe..a7de5f6948 100644 > --- a/include/charset.h > +++ b/include/charset.h > @@ -8,11 +8,20 @@ > #ifndef __CHARSET_H_ > #define __CHARSET_H_ > > +#include <efi.h>
Yeah ... eh ... no :). I assume this is just a leftover from the old version? > #include <linux/kernel.h> > #include <linux/types.h> > > #define MAX_UTF8_PER_UTF16 3 > > +/** > + * console_read_unicode() - read Unicode code point from console > + * > + * @code: code point Please specify this a bit clearer. > + * Return: 0 = success > + */ > +int console_read_unicode(s32 *code); > + > /** > * utf8_get() - get next UTF-8 code point from buffer > * > diff --git a/lib/charset.c b/lib/charset.c > index 72c808ce64..1806b41cc3 100644 > --- a/lib/charset.c > +++ b/lib/charset.c > @@ -5,6 +5,7 @@ > * Copyright (c) 2017 Rob Clark > */ > > +#include <common.h> > #include <charset.h> > #include <capitalization.h> > #include <malloc.h> > @@ -18,67 +19,106 @@ static struct capitalization_table > capitalization_table[] = > CP437_CAPITALIZATION_TABLE; > #endif > > -s32 utf8_get(const char **src) > +/** > + * get_code() - read Unicode code point from UTF-8 stream > + * > + * @read_u8: - stream reader > + * @src: - string buffer passed to stream reader, optional > + * Return: - Unicode code point > + */ > +static int get_code(u8 (*read_u8)(void *data), void *data) > { > - s32 code = 0; > - unsigned char c; > + s32 ch = 0; > > - if (!src || !*src) > - return -1; > - if (!**src) > + ch = read_u8(data); > + if (!ch) > return 0; > - c = **src; > - if (c >= 0x80) { > - ++*src; > - if (!**src) > - return -1; > - /* > - * We do not expect a continuation byte (0x80 - 0xbf). > - * 0x80 is coded as 0xc2 0x80, so we cannot have less then 0xc2 > - * here. > - * The highest code point is 0x10ffff which is coded as > - * 0xf4 0x8f 0xbf 0xbf. So we cannot have a byte above 0xf4. > - */ > - if (c < 0xc2 || code > 0xf4) > - return -1; > - if (c >= 0xe0) { > - if (c >= 0xf0) { > + if (ch >= 0xc2 && ch <= 0xf4) { > + int code = 0; > + > + if (ch >= 0xe0) { > + if (ch >= 0xf0) { > /* 0xf0 - 0xf4 */ > - c &= 0x07; > - code = c << 18; > - c = **src; > - ++*src; > - if (!**src) > - return -1; > - if (c < 0x80 || c > 0xbf) > - return -1; > - c &= 0x3f; > + ch &= 0x07; > + code = ch << 18; > + ch = read_u8(data); > + if (ch < 0x80 || ch > 0xbf) > + goto error; > + ch &= 0x3f; > } else { > /* 0xe0 - 0xef */ > - c &= 0x0f; > + ch &= 0x0f; > } > - code += c << 12; > + code += ch << 12; > if ((code >= 0xD800 && code <= 0xDFFF) || > code >= 0x110000) > - return -1; > - c = **src; > - ++*src; > - if (!**src) > - return -1; > - if (c < 0x80 || c > 0xbf) > - return -1; > + goto error; > + ch = read_u8(data); > + if (ch < 0x80 || ch > 0xbf) > + goto error; > } > /* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */ > - c &= 0x3f; > - code += c << 6; > - c = **src; > - if (c < 0x80 || c > 0xbf) > - return -1; > - c &= 0x3f; > + ch &= 0x3f; > + code += ch << 6; > + ch = read_u8(data); > + if (ch < 0x80 || ch > 0xbf) > + goto error; > + ch &= 0x3f; > + ch += code; > + } else if (ch >= 0x80) { > + goto error; > } > - code += c; > + return ch; > +error: > + return '?'; > +} > + > +/** > + * read_string() - read byte from character string > + * > + * @data: - pointer to string > + * Return: - byte read > + * > + * The string pointer is incremented if it does not point to '\0'. > + */ > +static u8 read_string(void *data) > + > +{ > + const char **src = (const char **)data; > + u8 c; > + > + if (!src || !*src || !**src) > + return 0; > + c = (unsigned char)**src; Please remove the cast. Btw, you could also write this as return *(*src++); > ++*src; > - return code; > + return c; > +} > + > +/** > + * read_console() - read byte from console > + * > + * @src - not used, needed to match interface > + * Return: - byte read > + */ > +static u8 read_console(void *data) > +{ > + return getc(); > +} > + > +int console_read_unicode(s32 *code) > +{ > + if (!tstc()) > + /* No input available */ > + return 1; Please avoid multi-line indented code without braces. > + > + /* Read Unicode code */ > + *code = get_code(read_console, NULL); > + return 0; > +} > + > +s32 utf8_get(const char **src) > +{ > + return get_code(read_string, src); > } > Alex _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de https://lists.denx.de/listinfo/u-boot