billiob pushed a commit to branch master. http://git.enlightenment.org/apps/terminology.git/commit/?id=8c970b88047882fe95907c06b08ff31eb2a6cad3
commit 8c970b88047882fe95907c06b08ff31eb2a6cad3 Author: Boris Faure <bill...@gmail.com> Date: Sun Jul 5 21:40:42 2020 +0200 termptydbl: generate the exact double width test This based on unicode 13.0: https://www.unicode.org/reports/tr44/ Code is generated by tools/unicode_dbl_width.py I'm using switch-case + fall through as I've found it was the best: https://github.com/billiob/terminology/commit/f46d550a8b5a8fba1522796c5c7f6a0672070e73 --- src/bin/termptydbl.c | 676 ++++++++++++++++++++------------------------- src/bin/termptydbl.h | 25 +- tools/unicode_dbl_width.py | 190 +++++++++++++ 3 files changed, 494 insertions(+), 397 deletions(-) diff --git a/src/bin/termptydbl.c b/src/bin/termptydbl.c index a4362d7..691c062 100644 --- a/src/bin/termptydbl.c +++ b/src/bin/termptydbl.c @@ -1,396 +1,308 @@ +/* XXX: Code generated by tool unicode_dbl_width.py */ #include "private.h" #include <Elementary.h> #include "termpty.h" #include "termptydbl.h" +__attribute__((const)) Eina_Bool -_termpty_is_dblwidth_slow_get(const Termpty *ty, int g) +_termpty_is_ambigous_wide(Eina_Unicode g) { - // check for east asian full-width (F), half-width (H), wide (W), - // narrow (Na) or ambiguous (A) codepoints - // ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt + switch (g) + { + case 0xA1: EINA_FALLTHROUGH; + case 0xA4: EINA_FALLTHROUGH; + case 0xA7 ... 0xAA: EINA_FALLTHROUGH; + case 0xAD ... 0xAE: EINA_FALLTHROUGH; + case 0xB0 ... 0xB4: EINA_FALLTHROUGH; + case 0xB6 ... 0xBA: EINA_FALLTHROUGH; + case 0xBC ... 0xBF: EINA_FALLTHROUGH; + case 0xC6: EINA_FALLTHROUGH; + case 0xD0: EINA_FALLTHROUGH; + case 0xD7 ... 0xD8: EINA_FALLTHROUGH; + case 0xDE ... 0xE1: EINA_FALLTHROUGH; + case 0xE6: EINA_FALLTHROUGH; + case 0xE8 ... 0xEA: EINA_FALLTHROUGH; + case 0xEC ... 0xED: EINA_FALLTHROUGH; + case 0xF0: EINA_FALLTHROUGH; + case 0xF2 ... 0xF3: EINA_FALLTHROUGH; + case 0xF7 ... 0xFA: EINA_FALLTHROUGH; + case 0xFC: EINA_FALLTHROUGH; + case 0xFE: EINA_FALLTHROUGH; + case 0x101: EINA_FALLTHROUGH; + case 0x111: EINA_FALLTHROUGH; + case 0x113: EINA_FALLTHROUGH; + case 0x11B: EINA_FALLTHROUGH; + case 0x126 ... 0x127: EINA_FALLTHROUGH; + case 0x12B: EINA_FALLTHROUGH; + case 0x131 ... 0x133: EINA_FALLTHROUGH; + case 0x138: EINA_FALLTHROUGH; + case 0x13F ... 0x142: EINA_FALLTHROUGH; + case 0x144: EINA_FALLTHROUGH; + case 0x148 ... 0x14B: EINA_FALLTHROUGH; + case 0x14D: EINA_FALLTHROUGH; + case 0x152 ... 0x153: EINA_FALLTHROUGH; + case 0x166 ... 0x167: EINA_FALLTHROUGH; + case 0x16B: EINA_FALLTHROUGH; + case 0x1CE: EINA_FALLTHROUGH; + case 0x1D0: EINA_FALLTHROUGH; + case 0x1D2: EINA_FALLTHROUGH; + case 0x1D4: EINA_FALLTHROUGH; + case 0x1D6: EINA_FALLTHROUGH; + case 0x1D8: EINA_FALLTHROUGH; + case 0x1DA: EINA_FALLTHROUGH; + case 0x1DC: EINA_FALLTHROUGH; + case 0x251: EINA_FALLTHROUGH; + case 0x261: EINA_FALLTHROUGH; + case 0x2C4: EINA_FALLTHROUGH; + case 0x2C7: EINA_FALLTHROUGH; + case 0x2C9 ... 0x2CB: EINA_FALLTHROUGH; + case 0x2CD: EINA_FALLTHROUGH; + case 0x2D0: EINA_FALLTHROUGH; + case 0x2D8 ... 0x2DB: EINA_FALLTHROUGH; + case 0x2DD: EINA_FALLTHROUGH; + case 0x2DF: EINA_FALLTHROUGH; + case 0x300 ... 0x36F: EINA_FALLTHROUGH; + case 0x391 ... 0x3A9: EINA_FALLTHROUGH; + case 0x3B1 ... 0x3C1: EINA_FALLTHROUGH; + case 0x3C3 ... 0x3C9: EINA_FALLTHROUGH; + case 0x401: EINA_FALLTHROUGH; + case 0x410 ... 0x44F: EINA_FALLTHROUGH; + case 0x451: EINA_FALLTHROUGH; + case 0x1100 ... 0x115F: EINA_FALLTHROUGH; + case 0x2010: EINA_FALLTHROUGH; + case 0x2013 ... 0x2016: EINA_FALLTHROUGH; + case 0x2018 ... 0x2019: EINA_FALLTHROUGH; + case 0x201C ... 0x201D: EINA_FALLTHROUGH; + case 0x2020 ... 0x2022: EINA_FALLTHROUGH; + case 0x2024 ... 0x2027: EINA_FALLTHROUGH; + case 0x2030: EINA_FALLTHROUGH; + case 0x2032 ... 0x2033: EINA_FALLTHROUGH; + case 0x2035: EINA_FALLTHROUGH; + case 0x203B ... 0x203C: EINA_FALLTHROUGH; + case 0x203E: EINA_FALLTHROUGH; + case 0x2049: EINA_FALLTHROUGH; + case 0x2074: EINA_FALLTHROUGH; + case 0x207F: EINA_FALLTHROUGH; + case 0x2081 ... 0x2084: EINA_FALLTHROUGH; + case 0x20AC: EINA_FALLTHROUGH; + case 0x2103: EINA_FALLTHROUGH; + case 0x2105: EINA_FALLTHROUGH; + case 0x2109: EINA_FALLTHROUGH; + case 0x2113: EINA_FALLTHROUGH; + case 0x2116: EINA_FALLTHROUGH; + case 0x2121 ... 0x2122: EINA_FALLTHROUGH; + case 0x2126: EINA_FALLTHROUGH; + case 0x212B: EINA_FALLTHROUGH; + case 0x2139: EINA_FALLTHROUGH; + case 0x2153 ... 0x2154: EINA_FALLTHROUGH; + case 0x215B ... 0x215E: EINA_FALLTHROUGH; + case 0x2160 ... 0x216B: EINA_FALLTHROUGH; + case 0x2170 ... 0x2179: EINA_FALLTHROUGH; + case 0x2189: EINA_FALLTHROUGH; + case 0x2190 ... 0x2199: EINA_FALLTHROUGH; + case 0x21A9 ... 0x21AA: EINA_FALLTHROUGH; + case 0x21B8 ... 0x21B9: EINA_FALLTHROUGH; + case 0x21D2: EINA_FALLTHROUGH; + case 0x21D4: EINA_FALLTHROUGH; + case 0x21E7: EINA_FALLTHROUGH; + case 0x2200: EINA_FALLTHROUGH; + case 0x2202 ... 0x2203: EINA_FALLTHROUGH; + case 0x2207 ... 0x2208: EINA_FALLTHROUGH; + case 0x220B: EINA_FALLTHROUGH; + case 0x220F: EINA_FALLTHROUGH; + case 0x2211: EINA_FALLTHROUGH; + case 0x2215: EINA_FALLTHROUGH; + case 0x221A: EINA_FALLTHROUGH; + case 0x221D ... 0x2220: EINA_FALLTHROUGH; + case 0x2223: EINA_FALLTHROUGH; + case 0x2225: EINA_FALLTHROUGH; + case 0x2227 ... 0x222C: EINA_FALLTHROUGH; + case 0x222E: EINA_FALLTHROUGH; + case 0x2234 ... 0x2237: EINA_FALLTHROUGH; + case 0x223C ... 0x223D: EINA_FALLTHROUGH; + case 0x2248: EINA_FALLTHROUGH; + case 0x224C: EINA_FALLTHROUGH; + case 0x2252: EINA_FALLTHROUGH; + case 0x2260 ... 0x2261: EINA_FALLTHROUGH; + case 0x2264 ... 0x2267: EINA_FALLTHROUGH; + case 0x226A ... 0x226B: EINA_FALLTHROUGH; + case 0x226E ... 0x226F: EINA_FALLTHROUGH; + case 0x2282 ... 0x2283: EINA_FALLTHROUGH; + case 0x2286 ... 0x2287: EINA_FALLTHROUGH; + case 0x2295: EINA_FALLTHROUGH; + case 0x2299: EINA_FALLTHROUGH; + case 0x22A5: EINA_FALLTHROUGH; + case 0x22BF: EINA_FALLTHROUGH; + case 0x2312: EINA_FALLTHROUGH; + case 0x231A ... 0x231B: EINA_FALLTHROUGH; + case 0x2328 ... 0x232A: EINA_FALLTHROUGH; + case 0x2388: EINA_FALLTHROUGH; + case 0x23CF: EINA_FALLTHROUGH; + case 0x23E9 ... 0x23F3: EINA_FALLTHROUGH; + case 0x23F8 ... 0x23FA: EINA_FALLTHROUGH; + case 0x2460 ... 0x24E9: EINA_FALLTHROUGH; + case 0x24EB ... 0x254B: EINA_FALLTHROUGH; + case 0x2550 ... 0x2573: EINA_FALLTHROUGH; + case 0x2580 ... 0x258F: EINA_FALLTHROUGH; + case 0x2592 ... 0x2595: EINA_FALLTHROUGH; + case 0x25A0 ... 0x25A1: EINA_FALLTHROUGH; + case 0x25A3 ... 0x25AB: EINA_FALLTHROUGH; + case 0x25B2 ... 0x25B3: EINA_FALLTHROUGH; + case 0x25B6 ... 0x25B7: EINA_FALLTHROUGH; + case 0x25BC ... 0x25BD: EINA_FALLTHROUGH; + case 0x25C0 ... 0x25C1: EINA_FALLTHROUGH; + case 0x25C6 ... 0x25C8: EINA_FALLTHROUGH; + case 0x25CB: EINA_FALLTHROUGH; + case 0x25CE ... 0x25D1: EINA_FALLTHROUGH; + case 0x25E2 ... 0x25E5: EINA_FALLTHROUGH; + case 0x25EF: EINA_FALLTHROUGH; + case 0x25FB ... 0x25FE: EINA_FALLTHROUGH; + case 0x2600 ... 0x2612: EINA_FALLTHROUGH; + case 0x2614 ... 0x2685: EINA_FALLTHROUGH; + case 0x2690 ... 0x2705: EINA_FALLTHROUGH; + case 0x2708 ... 0x2712: EINA_FALLTHROUGH; + case 0x2714: EINA_FALLTHROUGH; + case 0x2716: EINA_FALLTHROUGH; + case 0x271D: EINA_FALLTHROUGH; + case 0x2721: EINA_FALLTHROUGH; + case 0x2728: EINA_FALLTHROUGH; + case 0x2733 ... 0x2734: EINA_FALLTHROUGH; + case 0x273D: EINA_FALLTHROUGH; + case 0x2744: EINA_FALLTHROUGH; + case 0x2747: EINA_FALLTHROUGH; + case 0x274C: EINA_FALLTHROUGH; + case 0x274E: EINA_FALLTHROUGH; + case 0x2753 ... 0x2755: EINA_FALLTHROUGH; + case 0x2757: EINA_FALLTHROUGH; + case 0x2763 ... 0x2767: EINA_FALLTHROUGH; + case 0x2776 ... 0x277F: EINA_FALLTHROUGH; + case 0x2795 ... 0x2797: EINA_FALLTHROUGH; + case 0x27A1: EINA_FALLTHROUGH; + case 0x27B0: EINA_FALLTHROUGH; + case 0x27BF: EINA_FALLTHROUGH; + case 0x2934 ... 0x2935: EINA_FALLTHROUGH; + case 0x2B05 ... 0x2B07: EINA_FALLTHROUGH; + case 0x2B1B ... 0x2B1C: EINA_FALLTHROUGH; + case 0x2B50: EINA_FALLTHROUGH; + case 0x2B55 ... 0x2B59: EINA_FALLTHROUGH; + case 0x2E80 ... 0x303E: EINA_FALLTHROUGH; + case 0x3041 ... 0x4DBF: EINA_FALLTHROUGH; + case 0x4E00 ... 0xA4C6: EINA_FALLTHROUGH; + case 0xA960 ... 0xA97C: EINA_FALLTHROUGH; + case 0xAC00 ... 0xD7A3: EINA_FALLTHROUGH; + case 0xF900 ... 0xFAD9: EINA_FALLTHROUGH; + case 0xFE00 ... 0xFE19: EINA_FALLTHROUGH; + case 0xFE30 ... 0xFE6B: EINA_FALLTHROUGH; + case 0xFF01 ... 0xFF60: EINA_FALLTHROUGH; + case 0xFFE0 ... 0xFFE6: EINA_FALLTHROUGH; + case 0xFFFD: EINA_FALLTHROUGH; + case 0x16FE0 ... 0x1B2FB: EINA_FALLTHROUGH; + case 0x1F000 ... 0x1F10A: EINA_FALLTHROUGH; + case 0x1F10D ... 0x1F12D: EINA_FALLTHROUGH; + case 0x1F12F ... 0x1F169: EINA_FALLTHROUGH; + case 0x1F16C ... 0x1F1AD: EINA_FALLTHROUGH; + case 0x1F200 ... 0x1F53D: EINA_FALLTHROUGH; + case 0x1F546 ... 0x1F64F: EINA_FALLTHROUGH; + case 0x1F680 ... 0x1F6FC: EINA_FALLTHROUGH; + case 0x1F7D5 ... 0x1F7EB: EINA_FALLTHROUGH; + case 0x1F8B0 ... 0x1F8B1: EINA_FALLTHROUGH; + case 0x1F90C ... 0x1F93A: EINA_FALLTHROUGH; + case 0x1F93C ... 0x1F945: EINA_FALLTHROUGH; + case 0x1F947 ... 0x1FAD6: EINA_FALLTHROUGH; + case 0x20000 ... 0x3134A: EINA_FALLTHROUGH; + case 0xE0100 ... 0xE01EF: - // emoji should be double since unicode 9 (was single before): - // http://www.unicode.org/emoji/charts/full-emoji-list.html - // - // [ 0x0080 -> 0x02AF] !!! handle carefully ** - // [ 0x1DC0 -> 0x1DFF] - // [ 0x1E00 -> 0x1EFF] - // [ 0x2000 -> 0x209F] !!! handle carefully ** - // [ 0x20D0 -> 0x214F] - // [ 0x2190 -> 0x23FF] - // [ 0x2460 -> 0x24FF] - // [ 0x2600 -> 0x262F] - // [ 0x2638 -> 0x27EF] - // [ 0x2900 -> 0x29FF] - // [ 0x2B00 -> 0x2BFF] !!! unicode only 2B55 2B50 - // [ 0x2C60 -> 0x2C7F] - // [ 0x2E00 -> 0x2E7F] - // [ 0x3000 -> 0x303F] !! not 33D1 - // [ 0xA490 -> 0xA4CF] - // [0x1F000 -> 0x1F02F] - // [0x1F0A0 -> 0x1F0FF] - // [0x1F100 -> 0x1F64F] - // [0x1F680 -> 0x1F6FF] - // [0x1F910 -> 0x1F96B] - // [0x1F980 -> 0x1F9E0] - // - // ** these range include these odities: - // © (copyright) 00A9 - // ® (registered) 00AE - // ‼ (double exclamation) 203C - // ⁉ (exclamation questionmark) 2049 - // which should be single width, so ignore them - - // (W) - // optimization: only look into more detailed ranges if within larger block - if ((g >= 0x1100) && (g <= 0x3FFFD)) - { - if ( - // 1XXX - ((g >= 0x1100) && (g <= 0x115f)) || // Hangul Jamo - // 2XXX - ((g == 0x2329) || (g == 0x232a)) || // <> - ((g >= 0x2e80) && (g <= 0x2ffb)) || // Radical supplements - // 3XXX -> A4C6 - ((g >= 0x3001) && (g <= 0x303f)) || // CJK Symbols and Punctuation - ((g >= 0x3041) && (g <= 0x3247)) || // Hiragana, Katakana, - // Bopomoto, Hangul - // Compatibility Jamo, Kanbun, - // Bopomofo Extended, CJK - // Strokes, Katana Phonetic - // Extensions, Enclosed CJK - // Letters and Months - ((g >= 0x3250) && (g <= 0x33D0)) || // Enclosed CJK Letters and - // Months, CJK Compatibility - // [ symbols used by "powerline" ] - ((g >= 0x33D2) && (g <= 0x4dbf)) || // CJK Compatibility, CJK - // Unified Ideographs - // Extension A, Yijing - // Hexagram Symbols - ((g >= 0x4e00) && (g <= 0xa4c6)) || // CJK Unified Ideographs, - // Yi Syllables, Yi Radicals - // aXXX - ((g >= 0xa960) && (g <= 0xa97c)) || // Hangul Jamo Extended A - ((g >= 0xac00) && (g <= 0xd7a3)) || // Hangul Syllables - // fXXX - ((g >= 0xf900) && (g <= 0xfaff)) || // CJK Compatibility Ideographs - ((g >= 0xfe10) && (g <= 0xfe19)) || // Vertical Forms - ((g >= 0xfe30) && (g <= 0xfe6b)) || // CJK Compatibility Forms, - // Small Forms Variant - // 1XXXX - ((g >= 0x1b000) && (g <= 0x1b11e)) || // Kana Supplement, Kana - // Extended A - ((g >= 0x1b170) && (g <= 0x1b2fb)) || // Nushu - ((g >= 0x1f200) && (g <= 0x1f202)) || // Enclosed Ideographic - // Supplement - ((g >= 0x1f210) && (g <= 0x1f265)) || // Enclosed Ideographic - // Supplement - // 2XXXX - ((g >= 0x20000) && (g <= 0x2fffd)) || // CJK - // 3XXXX - ((g >= 0x30000) && (g <= 0x3fffd))) return EINA_TRUE; - } - if (ty->config->emoji_dbl_width && ((g >= 0x1f004) && (g <= 0x1f9c0))) - { - /* Taken from - * https://github.com/ridiculousfish/widecharwidth/blob/master/widechar_width.h - */ - const uint16_t u = (g & 0xfff); - if ( (u == 0x004) || - (u == 0x0cf) || - ((u >= 0x170) && (u <= 0x171)) || - ((u >= 0x17e) && (u <= 0x17f)) || - (u == 0x18e) || - ((u >= 0x191) && (u <= 0x19a)) || - ((u >= 0x1e6) && (u <= 0x1ff)) || - ((u >= 0x201) && (u < 0x202)) || - (u == 0x21a) || - (u == 0x22f) || - ((u >= 0x232) && (u <= 0x23a)) || - ((u >= 0x250) && (u <= 0x251)) || - ((u >= 0x300) && (u <= 0x321)) || - ((u >= 0x324) && (u <= 0x393)) || - ((u >= 0x396) && (u <= 0x397)) || - ((u >= 0x399) && (u <= 0x39B)) || - ((u >= 0x39E) && (u <= 0x3F0)) || - ((u >= 0x3F3) && (u <= 0x3F5)) || - ((u >= 0x3F7) && (u <= 0x4FD)) || - ((u >= 0x4FF) && (u <= 0x53D)) || - ((u >= 0x549) && (u <= 0x54E)) || - ((u >= 0x550) && (u <= 0x567)) || - ((u >= 0x56F) && (u <= 0x570)) || - ((u >= 0x573) && (u <= 0x579)) || - (u == 0x587) || - ((u >= 0x58A) && (u <= 0x58D)) || - (u == 0x590) || - ((u >= 0x595) && (u <= 0x596)) || - (u == 0x5A5) || - (u == 0x5A8) || - ((u >= 0x5B1) && (u <= 0x5B2)) || - (u == 0x5BC) || - ((u >= 0x5C2) && (u <= 0x5C4)) || - ((u >= 0x5D1) && (u <= 0x5D3)) || - ((u >= 0x5DC) && (u <= 0x5DE)) || - (u == 0x5E1) || - (u == 0x5E3) || - (u == 0x5E8) || - (u == 0x5EF) || - (u == 0x5F3) || - ((u >= 0x5FA) && (u <= 0x64F)) || - ((u >= 0x680) && (u <= 0x6C5)) || - ((u >= 0x6CB) && (u <= 0x6D0)) || - ((u >= 0x6E0) && (u <= 0x6E5)) || - (u == 0x6E9) || - ((u >= 0x6EB) && (u <= 0x6EC)) || - (u == 0x6F0) || - (u == 0x6F3) || - ((u >= 0x910) && (u <= 0x918)) || - ((u >= 0x980) && (u <= 0x984)) || - (u == 0x9C0) - ) - return EINA_TRUE; - } + } + return EINA_FALSE; +} - // FIXME: can optimize by breaking into tree and ranges - // (A) - if (ty->termstate.cjk_ambiguous_wide) - { - // grep ';A #' EastAsianWidth.txt | wc -l - // :( - if ( - // aX - (((g >> 4) == 0xa) && - ( - (g == 0x00a1) || - (g == 0x00a4) || - ((g >= 0x00a7) && (g <= 0x00a8)) || - (g == 0x00aa) || - ((g >= 0x00ad) && (g <= 0x00ae)))) || - // bX - (((g >> 4) == 0xb) && - (((g >= 0x00b0) && (g <= 0x00bf)))) || - // cX - (((g >> 4) == 0xc) && - ((g == 0x00c6))) || - // dX - (((g >> 4) == 0xd) && - ( - (g == 0x00d0) || - ((g >= 0x00d7) && (g <= 0x00d8)) || - ((g >= 0x00de) && (g <= 0x00df)))) || - // eX - (((g >> 4) == 0xe) && - ( - (g == 0x00e0) || - (g == 0x00e1) || - (g == 0x00e6) || - ((g >= 0x00e8) && (g <= 0x00e9)) || - (g == 0x00ea) || - ((g >= 0x00ec) && (g <= 0x00ed)))) || - // fX - (((g >> 4) == 0xf) && - ( - (g == 0x00f0) || - ((g >= 0x00f2) && (g <= 0x00f3)) || - ((g >= 0x00f7) && (g <= 0x00f9)) || - (g == 0x00fa) || - (g == 0x00fc) || - (g == 0x00fe))) || - // 1XX - (((g >> 8) == 0x1) && - ( - (g == 0x0101) || - (g == 0x0111) || - (g == 0x0113) || - (g == 0x011b) || - ((g >= 0x0126) && (g <= 0x0127)) || - (g == 0x012b) || - ((g >= 0x0131) && (g <= 0x0133)) || - (g == 0x0138) || - ((g >= 0x013f) && (g <= 0x0142)) || - (g == 0x0144) || - ((g >= 0x0148) && (g <= 0x014b)) || - (g == 0x014d) || - ((g >= 0x0152) && (g <= 0x0153)) || - ((g >= 0x0166) && (g <= 0x0167)) || - (g == 0x016b) || - (g == 0x01ce) || - (g == 0x01d0) || - (g == 0x01d2) || - (g == 0x01d4) || - (g == 0x01d6) || - (g == 0x01d8) || - (g == 0x01da) || - (g == 0x01dc))) || - // 2XX - (((g >> 8) == 0x2) && - ( - (g == 0x0251) || - (g == 0x0261) || - (g == 0x02c4) || - (g == 0x02c7) || - (g == 0x02c9) || - ((g >= 0x02ca) && (g <= 0x02cb)) || - (g == 0x02cd) || - (g == 0x02d0) || - ((g >= 0x02d8) && (g <= 0x02d9)) || - ((g >= 0x02da) && (g <= 0x02db)) || - (g == 0x02dd) || - (g == 0x02df))) || - // 3XX - (((g >> 8) == 0x3) && - ( - ((g >= 0x0300) && (g <= 0x036f)) || - ((g >= 0x0391) && (g <= 0x03c9)))) || - // 4XX - (((g >> 8) == 0x4) && - ( - (g == 0x0401) || - ((g >= 0x0410) && (g <= 0x044f)) || - (g == 0x0451))) || - // 2XXX - (((g >> 12) == 0x2) && - ((((g >> 8) == 0x20) && - ( - (g == 0x2010) || - ((g >= 0x2013) && (g <= 0x2016)) || - ((g >= 0x2018) && (g <= 0x2019)) || - (g == 0x201c) || - (g == 0x201d) || - ((g >= 0x2020) && (g <= 0x2022)) || - ((g >= 0x2024) && (g <= 0x2027)) || - (g == 0x2030) || - ((g >= 0x2032) && (g <= 0x2033)) || - (g == 0x2035) || - (g == 0x203b) || - (g == 0x203e) || - (g == 0x2074) || - (g == 0x207f) || - ((g >= 0x2081) && (g <= 0x2084)) || - (g == 0x20ac))) || - (((g >> 8) == 0x21) && - ( - (g == 0x2103) || - (g == 0x2105) || - (g == 0x2109) || - (g == 0x2113) || - (g == 0x2116) || - ((g >= 0x2121) && (g <= 0x2122)) || - (g == 0x2126) || - (g == 0x212b) || - ((g >= 0x2153) && (g <= 0x2154)) || - ((g >= 0x215b) && (g <= 0x215e)) || - ((g >= 0x2160) && (g <= 0x216b)) || - ((g >= 0x2170) && (g <= 0x2179)) || - ((g >= 0x2189) && (g <= 0x2199)) || - ((g >= 0x21b8) && (g <= 0x21b9)) || - (g == 0x21d2) || - (g == 0x21d4) || - (g == 0x21e7))) || - (((g >> 8) == 0x22) && - ( - (g == 0x2200) || - ((g >= 0x2202) && (g <= 0x2203)) || - ((g >= 0x2207) && (g <= 0x2208)) || - (g == 0x220b) || - (g == 0x220f) || - (g == 0x2211) || - (g == 0x2215) || - (g == 0x221a) || - ((g >= 0x221d) && (g <= 0x221f)) || - (g == 0x2220) || - (g == 0x2223) || - (g == 0x2225) || - ((g >= 0x2227) && (g <= 0x222e)) || - ((g >= 0x2234) && (g <= 0x2237)) || - ((g >= 0x223c) && (g <= 0x223d)) || - (g == 0x2248) || - (g == 0x224c) || - (g == 0x2252) || - ((g >= 0x2260) && (g <= 0x2261)) || - ((g >= 0x2264) && (g <= 0x2267)) || - ((g >= 0x226a) && (g <= 0x226b)) || - ((g >= 0x226e) && (g <= 0x226f)) || - ((g >= 0x2282) && (g <= 0x2283)) || - ((g >= 0x2286) && (g <= 0x2287)) || - (g == 0x2295) || - (g == 0x2299) || - (g == 0x22a5) || - (g == 0x22bf))) || - (((g >> 8) == 0x23) && - ((g == 0x2312))) || - ((((g >> 8) == 0x24) || ((g >> 8) == 0x25)) && - (((g >= 0x2460) && (g <= 0x2595)))) || - (((g >> 8) == 0x25) && - ( - ((g >= 0x25a0) && (g <= 0x25bd)) || - ((g >= 0x25c0) && (g <= 0x25c1)) || - ((g >= 0x25c6) && (g <= 0x25c7)) || - (g == 0x25c8) || - (g == 0x25cb) || - ((g >= 0x25ce) && (g <= 0x25cf)) || - ((g >= 0x25d0) && (g <= 0x25d1)) || - ((g >= 0x25e2) && (g <= 0x25e3)) || - ((g >= 0x25e4) && (g <= 0x25e5)) || - (g == 0x25ef))) || - (((g >> 8) == 0x26) && - ( - ((g >= 0x2605) && (g <= 0x2606)) || - (g == 0x2609) || - ((g >= 0x260e) && (g <= 0x260f)) || - ((g >= 0x2614) && (g <= 0x2615)) || - (g == 0x261c) || - (g == 0x261e) || - (g == 0x2640) || - (g == 0x2642) || - ((g >= 0x2660) && (g <= 0x2661)) || - ((g >= 0x2663) && (g <= 0x2665)) || - ((g >= 0x2667) && (g <= 0x266a)) || - ((g >= 0x266c) && (g <= 0x266d)) || - (g == 0x266f) || - ((g >= 0x269e) && (g <= 0x269f)) || - ((g >= 0x26be) && (g <= 0x26bf)) || - ((g >= 0x26c4) && (g <= 0x26cd)) || - (g == 0x26cf) || - ((g >= 0x26d0) && (g <= 0x26e1)) || - (g == 0x26e3) || - ((g >= 0x26e8) && (g <= 0x26ff)))) || - (((g >> 8) == 0x27) && - ( - (g == 0x273d) || - (g == 0x2757) || - ((g >= 0x2776) && (g <= 0x277f)))) || - (((g >> 8) == 0x2b) && - (((g >= 0x2b55) && (g <= 0x2b59)))))) || - // 3XXX - (((g >> 12) == 0x3) && - (((g >= 0x3248) && (g <= 0x324f)))) || - // fXXX - (((g >> 12) == 0xf) && - ( - ((g >= 0xfe00) && (g <= 0xfe0f)) || - (g == 0xfffd))) || - // 1XXXX - (((g >> 16) == 0x1) && - ( - ((g >= 0x1f100) && (g <= 0x1f12d)) || - ((g >= 0x1f130) && (g <= 0x1f169)) || - ((g >= 0x1f170) && (g <= 0x1f19a)))) || - // eXXXX - (((g >> 16) == 0xe) && - (((g >= 0xe0100) && (g <= 0xe01ef)))) || - // fXXXX - (((g >> 16) == 0xf) && - (((g >= 0xf0000) && (g <= 0xffffd)))) || - // 1XXXXX - (((g >> 24) == 0x1) && - (((g >= 0x100000) && (g <= 0x10fffd))))) - return EINA_TRUE; - } +__attribute__((const)) +Eina_Bool +_termpty_is_wide(Eina_Unicode g) +{ + switch (g) + { + case 0xA9: EINA_FALLTHROUGH; + case 0xAE: EINA_FALLTHROUGH; + case 0x1100 ... 0x115F: EINA_FALLTHROUGH; + case 0x203C: EINA_FALLTHROUGH; + case 0x2049: EINA_FALLTHROUGH; + case 0x2122: EINA_FALLTHROUGH; + case 0x2139: EINA_FALLTHROUGH; + case 0x2194 ... 0x2199: EINA_FALLTHROUGH; + case 0x21A9 ... 0x21AA: EINA_FALLTHROUGH; + case 0x231A ... 0x231B: EINA_FALLTHROUGH; + case 0x2328 ... 0x232A: EINA_FALLTHROUGH; + case 0x2388: EINA_FALLTHROUGH; + case 0x23CF: EINA_FALLTHROUGH; + case 0x23E9 ... 0x23F3: EINA_FALLTHROUGH; + case 0x23F8 ... 0x23FA: EINA_FALLTHROUGH; + case 0x24C2: EINA_FALLTHROUGH; + case 0x25AA ... 0x25AB: EINA_FALLTHROUGH; + case 0x25B6: EINA_FALLTHROUGH; + case 0x25C0: EINA_FALLTHROUGH; + case 0x25FB ... 0x25FE: EINA_FALLTHROUGH; + case 0x2600 ... 0x2605: EINA_FALLTHROUGH; + case 0x2607 ... 0x2612: EINA_FALLTHROUGH; + case 0x2614 ... 0x2685: EINA_FALLTHROUGH; + case 0x2690 ... 0x2705: EINA_FALLTHROUGH; + case 0x2708 ... 0x2712: EINA_FALLTHROUGH; + case 0x2714: EINA_FALLTHROUGH; + case 0x2716: EINA_FALLTHROUGH; + case 0x271D: EINA_FALLTHROUGH; + case 0x2721: EINA_FALLTHROUGH; + case 0x2728: EINA_FALLTHROUGH; + case 0x2733 ... 0x2734: EINA_FALLTHROUGH; + case 0x2744: EINA_FALLTHROUGH; + case 0x2747: EINA_FALLTHROUGH; + case 0x274C: EINA_FALLTHROUGH; + case 0x274E: EINA_FALLTHROUGH; + case 0x2753 ... 0x2755: EINA_FALLTHROUGH; + case 0x2757: EINA_FALLTHROUGH; + case 0x2763 ... 0x2767: EINA_FALLTHROUGH; + case 0x2795 ... 0x2797: EINA_FALLTHROUGH; + case 0x27A1: EINA_FALLTHROUGH; + case 0x27B0: EINA_FALLTHROUGH; + case 0x27BF: EINA_FALLTHROUGH; + case 0x2934 ... 0x2935: EINA_FALLTHROUGH; + case 0x2B05 ... 0x2B07: EINA_FALLTHROUGH; + case 0x2B1B ... 0x2B1C: EINA_FALLTHROUGH; + case 0x2B50: EINA_FALLTHROUGH; + case 0x2B55: EINA_FALLTHROUGH; + case 0x2E80 ... 0x303E: EINA_FALLTHROUGH; + case 0x3041 ... 0x3247: EINA_FALLTHROUGH; + case 0x3250 ... 0x4DBF: EINA_FALLTHROUGH; + case 0x4E00 ... 0xA4C6: EINA_FALLTHROUGH; + case 0xA960 ... 0xA97C: EINA_FALLTHROUGH; + case 0xAC00 ... 0xD7A3: EINA_FALLTHROUGH; + case 0xF900 ... 0xFAD9: EINA_FALLTHROUGH; + case 0xFE10 ... 0xFE19: EINA_FALLTHROUGH; + case 0xFE30 ... 0xFE6B: EINA_FALLTHROUGH; + case 0xFF01 ... 0xFF60: EINA_FALLTHROUGH; + case 0xFFE0 ... 0xFFE6: EINA_FALLTHROUGH; + case 0x16FE0 ... 0x1B2FB: EINA_FALLTHROUGH; + case 0x1F000 ... 0x1F0F5: EINA_FALLTHROUGH; + case 0x1F10D ... 0x1F10F: EINA_FALLTHROUGH; + case 0x1F12F: EINA_FALLTHROUGH; + case 0x1F16C ... 0x1F171: EINA_FALLTHROUGH; + case 0x1F17E ... 0x1F17F: EINA_FALLTHROUGH; + case 0x1F18E: EINA_FALLTHROUGH; + case 0x1F191 ... 0x1F19A: EINA_FALLTHROUGH; + case 0x1F1AD: EINA_FALLTHROUGH; + case 0x1F200 ... 0x1F53D: EINA_FALLTHROUGH; + case 0x1F546 ... 0x1F64F: EINA_FALLTHROUGH; + case 0x1F680 ... 0x1F6FC: EINA_FALLTHROUGH; + case 0x1F7D5 ... 0x1F7EB: EINA_FALLTHROUGH; + case 0x1F8B0 ... 0x1F8B1: EINA_FALLTHROUGH; + case 0x1F90C ... 0x1F93A: EINA_FALLTHROUGH; + case 0x1F93C ... 0x1F945: EINA_FALLTHROUGH; + case 0x1F947 ... 0x1FAD6: EINA_FALLTHROUGH; + case 0x20000 ... 0x3134A: - // Na, H -> not checked + return EINA_TRUE; + } return EINA_FALSE; } diff --git a/src/bin/termptydbl.h b/src/bin/termptydbl.h index 142e16c..ca365b2 100644 --- a/src/bin/termptydbl.h +++ b/src/bin/termptydbl.h @@ -1,25 +1,20 @@ +/* XXX: Code generated by tool unicode_dbl_width.py */ #ifndef _TERMPTY_DBL_H__ #define _TERMPTY_DBL_H__ 1 -Eina_Bool _termpty_is_dblwidth_slow_get(const Termpty *ty, int g); +Eina_Bool _termpty_is_wide(const Eina_Unicode g); +Eina_Bool _termpty_is_ambigous_wide(const Eina_Unicode g); static inline Eina_Bool -_termpty_is_dblwidth_get(const Termpty *ty, int g) +_termpty_is_dblwidth_get(const Termpty *ty, const Eina_Unicode g) { - // check for east asian full-width (F), half-width (H), wide (W), - // narrow (Na) or ambiguous (A) codepoints - // ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt - - // optimize for latin1 non-ambiguous - if (g <= 0xa0) + /* optimize for latin1 non-ambiguous */ + if (g <= 0xA0) return EINA_FALSE; - // (F) - if ((g == 0x3000) || - ((g >= 0xff01) && (g <= 0xff60)) || - ((g >= 0xffe0) && (g <= 0xffe6))) - return EINA_TRUE; - - return _termpty_is_dblwidth_slow_get(ty, g); + if (!ty->termstate.cjk_ambiguous_wide) + return _termpty_is_wide(g); + else + return _termpty_is_ambigous_wide(g); } #endif diff --git a/tools/unicode_dbl_width.py b/tools/unicode_dbl_width.py new file mode 100755 index 0000000..2457834 --- /dev/null +++ b/tools/unicode_dbl_width.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 + +""" +Generate src/bin/termptydbl.{c,h} from unicode files +used with ucd.all.flat.xml from +https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.all.flat.zip +""" + +import argparse +from collections import namedtuple +import xml.etree.ElementTree as ET + +Range = namedtuple('range', ['width', 'start', 'end']) + +def get_ranges(xmlfile, emoji_as_wide): + tree = ET.parse(xmlfile) + root = tree.getroot() + repertoire = root.find("{http://www.unicode.org/ns/2003/ucd/1.0}repertoire") + chars = repertoire.findall("{http://www.unicode.org/ns/2003/ucd/1.0}char") + + ranges = [] + range = Range('N', 0, 0) + for c in chars: + ea = c.get('ea') + if ea in ('Na', 'H'): + ea = 'N' + if ea in ('F'): + ea = 'W' + assert ea in ('N', 'A', 'W') + cp = c.get('cp') + if not cp: + continue + if emoji_as_wide: + emoji = c.get('ExtPict') + if emoji == 'Y': + ea = 'W' + + cp = int(cp, 16) + if ea != range[0]: + ranges.append(range) + range = Range(ea, cp, cp) + else: + range = range._replace(end=cp) + + ranges.append(range) + + return ranges + +def merge_ranges(ranges, is_same_width): + res = [] + range = ranges[0] + for r in ranges: + if is_same_width(r, range): + range = range._replace(end=r.end) + else: + res.append(range) + range = r + res.append(range) + return res + +def skip_ranges(ranges, width_skipped): + res = [] + for r in ranges: + if r.width not in width_skipped: + res.append(r) + return res + +def gen_header(range, file_header): + file_header.write( +"""/* XXX: Code generated by tool unicode_dbl_width.py */ +#ifndef _TERMPTY_DBL_H__ +#define _TERMPTY_DBL_H__ 1 + +Eina_Bool _termpty_is_wide(const Eina_Unicode g); +Eina_Bool _termpty_is_ambigous_wide(const Eina_Unicode g); + +static inline Eina_Bool +_termpty_is_dblwidth_get(const Termpty *ty, const Eina_Unicode g) +{ + /* optimize for latin1 non-ambiguous */ +""") + file_header.write(f" if (g <= 0x{range.end:X})") + file_header.write( +""" + return EINA_FALSE; + if (!ty->termstate.cjk_ambiguous_wide) + return _termpty_is_wide(g); + else + return _termpty_is_ambigous_wide(g); +} + +#endif +""") + +def gen_ambigous(ranges, file_source): + file_source.write( +""" +__attribute__((const)) +Eina_Bool +_termpty_is_ambigous_wide(Eina_Unicode g) +{ + switch (g) + { +""") + def is_same_width(r1, r2): + if r1.width == 'N': + return r2.width == 'N' + else: + return r2.width in ('A', 'W') + ranges = merge_ranges(ranges[1:], is_same_width) + ranges = skip_ranges(ranges, ('N',)) + + fallthrough = " EINA_FALLTHROUGH;" + for idx, r in enumerate(ranges): + if r.width == 'N': + continue; + if idx == len(ranges) -1: + fallthrough = "" + if r.start == r.end: + file_source.write(f" case 0x{r.start:X}:{fallthrough}\n") + else: + file_source.write(f" case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n") + + file_source.write( +""" + return EINA_TRUE; + } + return EINA_FALSE; +} +""") + +def gen_wide(ranges, file_source): + file_source.write( +""" +__attribute__((const)) +Eina_Bool +_termpty_is_wide(Eina_Unicode g) +{ + switch (g) + { +""") + def is_same_width(r1, r2): + if r1.width in ('N', 'A'): + return r2.width in ('N', 'A') + else: + return r2.width == 'W' + ranges = merge_ranges(ranges[1:], is_same_width) + ranges = skip_ranges(ranges, ('N', 'A')) + fallthrough = " EINA_FALLTHROUGH;" + for idx, r in enumerate(ranges): + if r.width in ('N', 'A'): + continue; + if idx == len(ranges) -1: + fallthrough = "" + if r.start == r.end: + file_source.write(f" case 0x{r.start:X}:{fallthrough}\n") + else: + file_source.write(f" case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n") + + file_source.write( +""" + return EINA_TRUE; + } + return EINA_FALSE; +} +""") + + +def gen_c(ranges, file_header, file_source): + gen_header(ranges[0], file_header) + file_source.write( +"""/* XXX: Code generated by tool unicode_dbl_width.py */ +#include "private.h" + +#include <Elementary.h> +#include "termpty.h" +#include "termptydbl.h" +""") + gen_ambigous(ranges, file_source) + gen_wide(ranges, file_source) + +parser = argparse.ArgumentParser(description='Generate code handling different widths of unicode codepoints.') +parser.add_argument('xml', type=argparse.FileType('r')) +parser.add_argument('header', type=argparse.FileType('w')) +parser.add_argument('source', type=argparse.FileType('w')) + +args = parser.parse_args() + +ranges = get_ranges(args.xml, True) +gen_c(ranges, args.header, args.source) --