On 01.09.18 11:43, Alexander Graf wrote: > > > On 31.08.18 21:31, Heinrich Schuchardt wrote: >> This patch provides a define to initialize a table that maps lower to >> capital letters for Unicode code point 0x0000 - 0xffff. >> >> Signed-off-by: Heinrich Schuchardt <xypron.g...@gmx.de> >> --- >> v2 >> add shorter tables for code pages 437 and 1250 >> --- >> MAINTAINERS | 1 + >> include/capitalization.h | 2028 ++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 2029 insertions(+) >> create mode 100644 include/capitalization.h >> >> diff --git a/MAINTAINERS b/MAINTAINERS >> index 46f826a0fe..8c9cd83347 100644 >> --- a/MAINTAINERS >> +++ b/MAINTAINERS >> @@ -381,6 +381,7 @@ T: git git://github.com/agraf/u-boot.git >> F: doc/README.uefi >> F: doc/README.iscsi >> F: Documentation/efi.rst >> +F: include/capitalization.h >> F: include/efi* >> F: include/pe.h >> F: include/asm-generic/pe.h >> diff --git a/include/capitalization.h b/include/capitalization.h >> new file mode 100644 >> index 0000000000..2c24e1bf47 >> --- /dev/null >> +++ b/include/capitalization.h >> @@ -0,0 +1,2028 @@ >> +/* SPDX-License-Identifier: Unicode-DFS-2016 */ >> +/* >> + * Capitalization tables >> + */ >> + >> +struct capitalization_table { >> + u16 upper; >> + u16 lower; >> +}; >> + >> +/* >> + * Correspondence table for small and capital Unicode letters in the range >> of >> + * 0x0000 - 0xffff based on >> http://www.unicode.org/Public/UCA/11.0.0/allkeys.txt >> + */ >> +#define UNICODE_CAPITALIZATION_TABLE { \ >> + { 0x0531, /* ARMENIAN CAPITAL LETTER AYB */ \ >> + 0x0561, /* ARMENIAN SMALL LETTER AYB */ }, \ > > > [...] > >> + { 0x2C7F, /* LATIN CAPITAL LETTER Z WITH SWASH TAIL */ \ >> + 0x0240, /* LATIN SMALL LETTER Z WITH SWASH TAIL */ }, \ >> + { 0x0000, /* END OF LIST CAPITAL LETTERS */ \ >> + 0x0000, /* END OF LIST SMALL LETTERS */ }, \ >> +} >> + >> +/* >> + * Correspondence table for small and capital letters of codepage 437. >> + */ >> +#define CP437_CAPITALIZATION_TABLE { \ >> + { 0x03a6, 0x03c6, }, \ > > I like how you added comments to each entry on what exactly the > character is. Please keep that habit in the trimmed down tables too. > >> + { 0x03a3, 0x03c3, }, \ >> + { 0x0041, 0x0061, }, \ >> + { 0x00c4, 0x00e4, }, \ >> + { 0x00c5, 0x00e5, }, \ >> + { 0x00c6, 0x00e6, }, \ >> + { 0x0042, 0x0062, }, \ >> + { 0x0043, 0x0063, }, \ >> + { 0x00c7, 0x00e7, }, \ >> + { 0x0044, 0x0064, }, \ >> + { 0x0045, 0x0065, }, \ >> + { 0x00c9, 0x00e9, }, \ >> + { 0x0046, 0x0066, }, \ >> + { 0x0047, 0x0067, }, \ >> + { 0x0048, 0x0068, }, \ >> + { 0x0049, 0x0069, }, \ >> + { 0x004a, 0x006a, }, \ >> + { 0x004b, 0x006b, }, \ >> + { 0x004c, 0x006c, }, \ >> + { 0x004d, 0x006d, }, \ >> + { 0x004e, 0x006e, }, \ >> + { 0x00d1, 0x00f1, }, \ >> + { 0x004f, 0x006f, }, \ >> + { 0x00d6, 0x00f6, }, \ >> + { 0x0050, 0x0070, }, \ >> + { 0x0051, 0x0071, }, \ > > Most of these are just latin A to Z. These are already covered in your > conversion by code, no? So you can just omit them. > >> + { 0x0052, 0x0072, }, \ >> + { 0x0053, 0x0073, }, \ >> + { 0x0054, 0x0074, }, \ >> + { 0x0055, 0x0075, }, \ >> + { 0x00dc, 0x00fc, }, \ >> + { 0x0056, 0x0076, }, \ >> + { 0x0057, 0x0077, }, \ >> + { 0x0058, 0x0078, }, \ >> + { 0x0059, 0x0079, }, \ >> + { 0x005a, 0x007a, }, \ >> + { 0x0000, 0x0000, }, \ > > ... that would leave 11 entries for cp437 ... > >> +} >> + >> +/* >> + * Correspondence table for small and capital letters of codepage 1250. >> + */ >> +#define CP1250_CAPITALIZATION_TABLE { \ >> + { 0x0041, 0x0061, }, \ > > Please sort the list by code point - or any other recognizable sorting > order ;). > >> + { 0x00c1, 0x00e1, }, \ >> + { 0x0102, 0x0103, }, \ >> + { 0x00c2, 0x00e2, }, \ >> + { 0x00c4, 0x00e4, }, \ >> + { 0x0104, 0x0105, }, \ >> + { 0x0042, 0x0062, }, \ >> + { 0x0043, 0x0063, }, \ >> + { 0x0106, 0x0107, }, \ >> + { 0x010c, 0x010d, }, \ >> + { 0x00c7, 0x00e7, }, \ >> + { 0x0044, 0x0064, }, \ >> + { 0x010e, 0x010f, }, \ >> + { 0x0110, 0x0111, }, \ >> + { 0x0045, 0x0065, }, \ >> + { 0x00c9, 0x00e9, }, \ >> + { 0x011a, 0x011b, }, \ >> + { 0x00cb, 0x00eb, }, \ >> + { 0x0118, 0x0119, }, \ >> + { 0x0046, 0x0066, }, \ >> + { 0x0047, 0x0067, }, \ >> + { 0x0048, 0x0068, }, \ >> + { 0x0049, 0x0069, }, \ >> + { 0x00cd, 0x00ed, }, \ >> + { 0x00ce, 0x00ee, }, \ >> + { 0x004a, 0x006a, }, \ >> + { 0x004b, 0x006b, }, \ >> + { 0x004c, 0x006c, }, \ >> + { 0x0139, 0x013a, }, \ >> + { 0x013d, 0x013e, }, \ >> + { 0x0141, 0x0142, }, \ >> + { 0x004d, 0x006d, }, \ >> + { 0x004e, 0x006e, }, \ >> + { 0x0143, 0x0144, }, \ >> + { 0x0147, 0x0148, }, \ >> + { 0x004f, 0x006f, }, \ >> + { 0x00d3, 0x00f3, }, \ >> + { 0x00d4, 0x00f4, }, \ >> + { 0x00d6, 0x00f6, }, \ >> + { 0x0150, 0x0151, }, \ >> + { 0x0050, 0x0070, }, \ >> + { 0x0051, 0x0071, }, \ >> + { 0x0052, 0x0072, }, \ >> + { 0x0154, 0x0155, }, \ >> + { 0x0158, 0x0159, }, \ >> + { 0x0053, 0x0073, }, \ >> + { 0x015a, 0x015b, }, \ >> + { 0x0160, 0x0161, }, \ >> + { 0x015e, 0x015f, }, \ >> + { 0x0054, 0x0074, }, \ >> + { 0x0164, 0x0165, }, \ >> + { 0x0162, 0x0163, }, \ >> + { 0x0055, 0x0075, }, \ >> + { 0x00da, 0x00fa, }, \ >> + { 0x00dc, 0x00fc, }, \ >> + { 0x0170, 0x0171, }, \ >> + { 0x016e, 0x016f, }, \ >> + { 0x0056, 0x0076, }, \ >> + { 0x0057, 0x0077, }, \ >> + { 0x0058, 0x0078, }, \ >> + { 0x0059, 0x0079, }, \ >> + { 0x00dd, 0x00fd, }, \ >> + { 0x005a, 0x007a, }, \ >> + { 0x0179, 0x017a, }, \ >> + { 0x017d, 0x017e, }, \ >> + { 0x017b, 0x017c, }, \ > > ... and 40 unique points for cp1250. > > How about we just combine the two tables into one and call it "western"?
Actually, thinking about it again, keeping the tables separate is probably a good idea. Alex _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de https://lists.denx.de/listinfo/u-boot