billiob pushed a commit to branch master.

http://git.enlightenment.org/apps/terminology.git/commit/?id=8c970b88047882fe95907c06b08ff31eb2a6cad3

commit 8c970b88047882fe95907c06b08ff31eb2a6cad3
Author: Boris Faure <bill...@gmail.com>
Date:   Sun Jul 5 21:40:42 2020 +0200

    termptydbl: generate the exact double width test
    
    This based on unicode 13.0:
    https://www.unicode.org/reports/tr44/
    
    Code is generated by tools/unicode_dbl_width.py
    
    I'm using switch-case + fall through as I've found it was the best:
    
https://github.com/billiob/terminology/commit/f46d550a8b5a8fba1522796c5c7f6a0672070e73
---
 src/bin/termptydbl.c       | 676 ++++++++++++++++++++-------------------------
 src/bin/termptydbl.h       |  25 +-
 tools/unicode_dbl_width.py | 190 +++++++++++++
 3 files changed, 494 insertions(+), 397 deletions(-)

diff --git a/src/bin/termptydbl.c b/src/bin/termptydbl.c
index a4362d7..691c062 100644
--- a/src/bin/termptydbl.c
+++ b/src/bin/termptydbl.c
@@ -1,396 +1,308 @@
+/* XXX: Code generated by tool unicode_dbl_width.py */
 #include "private.h"
 
 #include <Elementary.h>
 #include "termpty.h"
 #include "termptydbl.h"
 
+__attribute__((const))
 Eina_Bool
-_termpty_is_dblwidth_slow_get(const Termpty *ty, int g)
+_termpty_is_ambigous_wide(Eina_Unicode g)
 {
-   // check for east asian full-width (F), half-width (H), wide (W),
-   // narrow (Na) or ambiguous (A) codepoints
-   // ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt
+    switch (g)
+      {
+       case 0xA1: EINA_FALLTHROUGH;
+       case 0xA4: EINA_FALLTHROUGH;
+       case 0xA7 ... 0xAA: EINA_FALLTHROUGH;
+       case 0xAD ... 0xAE: EINA_FALLTHROUGH;
+       case 0xB0 ... 0xB4: EINA_FALLTHROUGH;
+       case 0xB6 ... 0xBA: EINA_FALLTHROUGH;
+       case 0xBC ... 0xBF: EINA_FALLTHROUGH;
+       case 0xC6: EINA_FALLTHROUGH;
+       case 0xD0: EINA_FALLTHROUGH;
+       case 0xD7 ... 0xD8: EINA_FALLTHROUGH;
+       case 0xDE ... 0xE1: EINA_FALLTHROUGH;
+       case 0xE6: EINA_FALLTHROUGH;
+       case 0xE8 ... 0xEA: EINA_FALLTHROUGH;
+       case 0xEC ... 0xED: EINA_FALLTHROUGH;
+       case 0xF0: EINA_FALLTHROUGH;
+       case 0xF2 ... 0xF3: EINA_FALLTHROUGH;
+       case 0xF7 ... 0xFA: EINA_FALLTHROUGH;
+       case 0xFC: EINA_FALLTHROUGH;
+       case 0xFE: EINA_FALLTHROUGH;
+       case 0x101: EINA_FALLTHROUGH;
+       case 0x111: EINA_FALLTHROUGH;
+       case 0x113: EINA_FALLTHROUGH;
+       case 0x11B: EINA_FALLTHROUGH;
+       case 0x126 ... 0x127: EINA_FALLTHROUGH;
+       case 0x12B: EINA_FALLTHROUGH;
+       case 0x131 ... 0x133: EINA_FALLTHROUGH;
+       case 0x138: EINA_FALLTHROUGH;
+       case 0x13F ... 0x142: EINA_FALLTHROUGH;
+       case 0x144: EINA_FALLTHROUGH;
+       case 0x148 ... 0x14B: EINA_FALLTHROUGH;
+       case 0x14D: EINA_FALLTHROUGH;
+       case 0x152 ... 0x153: EINA_FALLTHROUGH;
+       case 0x166 ... 0x167: EINA_FALLTHROUGH;
+       case 0x16B: EINA_FALLTHROUGH;
+       case 0x1CE: EINA_FALLTHROUGH;
+       case 0x1D0: EINA_FALLTHROUGH;
+       case 0x1D2: EINA_FALLTHROUGH;
+       case 0x1D4: EINA_FALLTHROUGH;
+       case 0x1D6: EINA_FALLTHROUGH;
+       case 0x1D8: EINA_FALLTHROUGH;
+       case 0x1DA: EINA_FALLTHROUGH;
+       case 0x1DC: EINA_FALLTHROUGH;
+       case 0x251: EINA_FALLTHROUGH;
+       case 0x261: EINA_FALLTHROUGH;
+       case 0x2C4: EINA_FALLTHROUGH;
+       case 0x2C7: EINA_FALLTHROUGH;
+       case 0x2C9 ... 0x2CB: EINA_FALLTHROUGH;
+       case 0x2CD: EINA_FALLTHROUGH;
+       case 0x2D0: EINA_FALLTHROUGH;
+       case 0x2D8 ... 0x2DB: EINA_FALLTHROUGH;
+       case 0x2DD: EINA_FALLTHROUGH;
+       case 0x2DF: EINA_FALLTHROUGH;
+       case 0x300 ... 0x36F: EINA_FALLTHROUGH;
+       case 0x391 ... 0x3A9: EINA_FALLTHROUGH;
+       case 0x3B1 ... 0x3C1: EINA_FALLTHROUGH;
+       case 0x3C3 ... 0x3C9: EINA_FALLTHROUGH;
+       case 0x401: EINA_FALLTHROUGH;
+       case 0x410 ... 0x44F: EINA_FALLTHROUGH;
+       case 0x451: EINA_FALLTHROUGH;
+       case 0x1100 ... 0x115F: EINA_FALLTHROUGH;
+       case 0x2010: EINA_FALLTHROUGH;
+       case 0x2013 ... 0x2016: EINA_FALLTHROUGH;
+       case 0x2018 ... 0x2019: EINA_FALLTHROUGH;
+       case 0x201C ... 0x201D: EINA_FALLTHROUGH;
+       case 0x2020 ... 0x2022: EINA_FALLTHROUGH;
+       case 0x2024 ... 0x2027: EINA_FALLTHROUGH;
+       case 0x2030: EINA_FALLTHROUGH;
+       case 0x2032 ... 0x2033: EINA_FALLTHROUGH;
+       case 0x2035: EINA_FALLTHROUGH;
+       case 0x203B ... 0x203C: EINA_FALLTHROUGH;
+       case 0x203E: EINA_FALLTHROUGH;
+       case 0x2049: EINA_FALLTHROUGH;
+       case 0x2074: EINA_FALLTHROUGH;
+       case 0x207F: EINA_FALLTHROUGH;
+       case 0x2081 ... 0x2084: EINA_FALLTHROUGH;
+       case 0x20AC: EINA_FALLTHROUGH;
+       case 0x2103: EINA_FALLTHROUGH;
+       case 0x2105: EINA_FALLTHROUGH;
+       case 0x2109: EINA_FALLTHROUGH;
+       case 0x2113: EINA_FALLTHROUGH;
+       case 0x2116: EINA_FALLTHROUGH;
+       case 0x2121 ... 0x2122: EINA_FALLTHROUGH;
+       case 0x2126: EINA_FALLTHROUGH;
+       case 0x212B: EINA_FALLTHROUGH;
+       case 0x2139: EINA_FALLTHROUGH;
+       case 0x2153 ... 0x2154: EINA_FALLTHROUGH;
+       case 0x215B ... 0x215E: EINA_FALLTHROUGH;
+       case 0x2160 ... 0x216B: EINA_FALLTHROUGH;
+       case 0x2170 ... 0x2179: EINA_FALLTHROUGH;
+       case 0x2189: EINA_FALLTHROUGH;
+       case 0x2190 ... 0x2199: EINA_FALLTHROUGH;
+       case 0x21A9 ... 0x21AA: EINA_FALLTHROUGH;
+       case 0x21B8 ... 0x21B9: EINA_FALLTHROUGH;
+       case 0x21D2: EINA_FALLTHROUGH;
+       case 0x21D4: EINA_FALLTHROUGH;
+       case 0x21E7: EINA_FALLTHROUGH;
+       case 0x2200: EINA_FALLTHROUGH;
+       case 0x2202 ... 0x2203: EINA_FALLTHROUGH;
+       case 0x2207 ... 0x2208: EINA_FALLTHROUGH;
+       case 0x220B: EINA_FALLTHROUGH;
+       case 0x220F: EINA_FALLTHROUGH;
+       case 0x2211: EINA_FALLTHROUGH;
+       case 0x2215: EINA_FALLTHROUGH;
+       case 0x221A: EINA_FALLTHROUGH;
+       case 0x221D ... 0x2220: EINA_FALLTHROUGH;
+       case 0x2223: EINA_FALLTHROUGH;
+       case 0x2225: EINA_FALLTHROUGH;
+       case 0x2227 ... 0x222C: EINA_FALLTHROUGH;
+       case 0x222E: EINA_FALLTHROUGH;
+       case 0x2234 ... 0x2237: EINA_FALLTHROUGH;
+       case 0x223C ... 0x223D: EINA_FALLTHROUGH;
+       case 0x2248: EINA_FALLTHROUGH;
+       case 0x224C: EINA_FALLTHROUGH;
+       case 0x2252: EINA_FALLTHROUGH;
+       case 0x2260 ... 0x2261: EINA_FALLTHROUGH;
+       case 0x2264 ... 0x2267: EINA_FALLTHROUGH;
+       case 0x226A ... 0x226B: EINA_FALLTHROUGH;
+       case 0x226E ... 0x226F: EINA_FALLTHROUGH;
+       case 0x2282 ... 0x2283: EINA_FALLTHROUGH;
+       case 0x2286 ... 0x2287: EINA_FALLTHROUGH;
+       case 0x2295: EINA_FALLTHROUGH;
+       case 0x2299: EINA_FALLTHROUGH;
+       case 0x22A5: EINA_FALLTHROUGH;
+       case 0x22BF: EINA_FALLTHROUGH;
+       case 0x2312: EINA_FALLTHROUGH;
+       case 0x231A ... 0x231B: EINA_FALLTHROUGH;
+       case 0x2328 ... 0x232A: EINA_FALLTHROUGH;
+       case 0x2388: EINA_FALLTHROUGH;
+       case 0x23CF: EINA_FALLTHROUGH;
+       case 0x23E9 ... 0x23F3: EINA_FALLTHROUGH;
+       case 0x23F8 ... 0x23FA: EINA_FALLTHROUGH;
+       case 0x2460 ... 0x24E9: EINA_FALLTHROUGH;
+       case 0x24EB ... 0x254B: EINA_FALLTHROUGH;
+       case 0x2550 ... 0x2573: EINA_FALLTHROUGH;
+       case 0x2580 ... 0x258F: EINA_FALLTHROUGH;
+       case 0x2592 ... 0x2595: EINA_FALLTHROUGH;
+       case 0x25A0 ... 0x25A1: EINA_FALLTHROUGH;
+       case 0x25A3 ... 0x25AB: EINA_FALLTHROUGH;
+       case 0x25B2 ... 0x25B3: EINA_FALLTHROUGH;
+       case 0x25B6 ... 0x25B7: EINA_FALLTHROUGH;
+       case 0x25BC ... 0x25BD: EINA_FALLTHROUGH;
+       case 0x25C0 ... 0x25C1: EINA_FALLTHROUGH;
+       case 0x25C6 ... 0x25C8: EINA_FALLTHROUGH;
+       case 0x25CB: EINA_FALLTHROUGH;
+       case 0x25CE ... 0x25D1: EINA_FALLTHROUGH;
+       case 0x25E2 ... 0x25E5: EINA_FALLTHROUGH;
+       case 0x25EF: EINA_FALLTHROUGH;
+       case 0x25FB ... 0x25FE: EINA_FALLTHROUGH;
+       case 0x2600 ... 0x2612: EINA_FALLTHROUGH;
+       case 0x2614 ... 0x2685: EINA_FALLTHROUGH;
+       case 0x2690 ... 0x2705: EINA_FALLTHROUGH;
+       case 0x2708 ... 0x2712: EINA_FALLTHROUGH;
+       case 0x2714: EINA_FALLTHROUGH;
+       case 0x2716: EINA_FALLTHROUGH;
+       case 0x271D: EINA_FALLTHROUGH;
+       case 0x2721: EINA_FALLTHROUGH;
+       case 0x2728: EINA_FALLTHROUGH;
+       case 0x2733 ... 0x2734: EINA_FALLTHROUGH;
+       case 0x273D: EINA_FALLTHROUGH;
+       case 0x2744: EINA_FALLTHROUGH;
+       case 0x2747: EINA_FALLTHROUGH;
+       case 0x274C: EINA_FALLTHROUGH;
+       case 0x274E: EINA_FALLTHROUGH;
+       case 0x2753 ... 0x2755: EINA_FALLTHROUGH;
+       case 0x2757: EINA_FALLTHROUGH;
+       case 0x2763 ... 0x2767: EINA_FALLTHROUGH;
+       case 0x2776 ... 0x277F: EINA_FALLTHROUGH;
+       case 0x2795 ... 0x2797: EINA_FALLTHROUGH;
+       case 0x27A1: EINA_FALLTHROUGH;
+       case 0x27B0: EINA_FALLTHROUGH;
+       case 0x27BF: EINA_FALLTHROUGH;
+       case 0x2934 ... 0x2935: EINA_FALLTHROUGH;
+       case 0x2B05 ... 0x2B07: EINA_FALLTHROUGH;
+       case 0x2B1B ... 0x2B1C: EINA_FALLTHROUGH;
+       case 0x2B50: EINA_FALLTHROUGH;
+       case 0x2B55 ... 0x2B59: EINA_FALLTHROUGH;
+       case 0x2E80 ... 0x303E: EINA_FALLTHROUGH;
+       case 0x3041 ... 0x4DBF: EINA_FALLTHROUGH;
+       case 0x4E00 ... 0xA4C6: EINA_FALLTHROUGH;
+       case 0xA960 ... 0xA97C: EINA_FALLTHROUGH;
+       case 0xAC00 ... 0xD7A3: EINA_FALLTHROUGH;
+       case 0xF900 ... 0xFAD9: EINA_FALLTHROUGH;
+       case 0xFE00 ... 0xFE19: EINA_FALLTHROUGH;
+       case 0xFE30 ... 0xFE6B: EINA_FALLTHROUGH;
+       case 0xFF01 ... 0xFF60: EINA_FALLTHROUGH;
+       case 0xFFE0 ... 0xFFE6: EINA_FALLTHROUGH;
+       case 0xFFFD: EINA_FALLTHROUGH;
+       case 0x16FE0 ... 0x1B2FB: EINA_FALLTHROUGH;
+       case 0x1F000 ... 0x1F10A: EINA_FALLTHROUGH;
+       case 0x1F10D ... 0x1F12D: EINA_FALLTHROUGH;
+       case 0x1F12F ... 0x1F169: EINA_FALLTHROUGH;
+       case 0x1F16C ... 0x1F1AD: EINA_FALLTHROUGH;
+       case 0x1F200 ... 0x1F53D: EINA_FALLTHROUGH;
+       case 0x1F546 ... 0x1F64F: EINA_FALLTHROUGH;
+       case 0x1F680 ... 0x1F6FC: EINA_FALLTHROUGH;
+       case 0x1F7D5 ... 0x1F7EB: EINA_FALLTHROUGH;
+       case 0x1F8B0 ... 0x1F8B1: EINA_FALLTHROUGH;
+       case 0x1F90C ... 0x1F93A: EINA_FALLTHROUGH;
+       case 0x1F93C ... 0x1F945: EINA_FALLTHROUGH;
+       case 0x1F947 ... 0x1FAD6: EINA_FALLTHROUGH;
+       case 0x20000 ... 0x3134A: EINA_FALLTHROUGH;
+       case 0xE0100 ... 0xE01EF:
 
-   // emoji should be double since unicode 9 (was single before):
-   // http://www.unicode.org/emoji/charts/full-emoji-list.html
-   //
-   // [ 0x0080 ->  0x02AF] !!! handle carefully **
-   // [ 0x1DC0 ->  0x1DFF]
-   // [ 0x1E00 ->  0x1EFF]
-   // [ 0x2000 ->  0x209F] !!! handle carefully **
-   // [ 0x20D0 ->  0x214F]
-   // [ 0x2190 ->  0x23FF]
-   // [ 0x2460 ->  0x24FF]
-   // [ 0x2600 ->  0x262F]
-   // [ 0x2638 ->  0x27EF]
-   // [ 0x2900 ->  0x29FF]
-   // [ 0x2B00 ->  0x2BFF] !!! unicode only 2B55 2B50
-   // [ 0x2C60 ->  0x2C7F]
-   // [ 0x2E00 ->  0x2E7F]
-   // [ 0x3000 ->  0x303F] !! not 33D1
-   // [ 0xA490 ->  0xA4CF]
-   // [0x1F000 -> 0x1F02F]
-   // [0x1F0A0 -> 0x1F0FF]
-   // [0x1F100 -> 0x1F64F]
-   // [0x1F680 -> 0x1F6FF]
-   // [0x1F910 -> 0x1F96B]
-   // [0x1F980 -> 0x1F9E0]
-   //
-   // ** these range include these odities:
-   // © (copyright)                00A9
-   // ® (registered)               00AE
-   // ‼ (double exclamation)       203C
-   // ⁉ (exclamation questionmark) 2049
-   // which should be single width, so ignore them
-
-   // (W)
-   // optimization: only look into more detailed ranges if within larger block
-   if ((g >= 0x1100) && (g <= 0x3FFFD))
-     {
-        if (
-            // 1XXX
-            ((g >= 0x1100) && (g <= 0x115f)) || // Hangul Jamo
-            // 2XXX
-            ((g == 0x2329) || (g == 0x232a)) || // <>
-            ((g >= 0x2e80) && (g <= 0x2ffb)) || // Radical supplements
-            // 3XXX -> A4C6
-            ((g >= 0x3001) && (g <= 0x303f)) || // CJK Symbols and Punctuation
-            ((g >= 0x3041) && (g <= 0x3247)) || // Hiragana, Katakana,
-                                                // Bopomoto, Hangul
-                                                // Compatibility Jamo, Kanbun,
-                                                // Bopomofo Extended, CJK
-                                                // Strokes, Katana Phonetic
-                                                // Extensions, Enclosed CJK
-                                                // Letters and Months
-            ((g >= 0x3250) && (g <= 0x33D0)) || // Enclosed CJK Letters and
-                                                // Months, CJK Compatibility
-            // [ symbols used by "powerline" ]
-            ((g >= 0x33D2) && (g <= 0x4dbf)) || // CJK Compatibility, CJK
-                                                // Unified Ideographs
-                                                // Extension A, Yijing
-                                                // Hexagram Symbols
-            ((g >= 0x4e00) && (g <= 0xa4c6)) || // CJK Unified Ideographs,
-                                                // Yi Syllables, Yi Radicals
-            // aXXX
-            ((g >= 0xa960) && (g <= 0xa97c)) || // Hangul Jamo Extended A
-            ((g >= 0xac00) && (g <= 0xd7a3)) || // Hangul Syllables
-            // fXXX
-            ((g >= 0xf900) && (g <= 0xfaff)) || // CJK Compatibility Ideographs
-            ((g >= 0xfe10) && (g <= 0xfe19)) || // Vertical Forms
-            ((g >= 0xfe30) && (g <= 0xfe6b)) || // CJK Compatibility Forms,
-                                                // Small Forms Variant
-            // 1XXXX
-            ((g >= 0x1b000) && (g <= 0x1b11e)) || // Kana Supplement, Kana
-                                                  // Extended A
-            ((g >= 0x1b170) && (g <= 0x1b2fb)) || // Nushu
-            ((g >= 0x1f200) && (g <= 0x1f202)) || // Enclosed Ideographic
-                                                  // Supplement
-            ((g >= 0x1f210) && (g <= 0x1f265)) || // Enclosed Ideographic
-                                                  // Supplement
-            // 2XXXX
-            ((g >= 0x20000) && (g <= 0x2fffd)) || // CJK
-            // 3XXXX
-            ((g >= 0x30000) && (g <= 0x3fffd)))
         return EINA_TRUE;
-     }
-   if (ty->config->emoji_dbl_width && ((g >= 0x1f004) && (g <= 0x1f9c0)))
-     {
-        /* Taken from
-         * 
https://github.com/ridiculousfish/widecharwidth/blob/master/widechar_width.h
-         */
-        const uint16_t u = (g & 0xfff);
-        if ( (u == 0x004) ||
-             (u == 0x0cf) ||
-             ((u >= 0x170) && (u <= 0x171)) ||
-             ((u >= 0x17e) && (u <= 0x17f)) ||
-             (u == 0x18e) ||
-             ((u >= 0x191) && (u <= 0x19a)) ||
-             ((u >= 0x1e6) && (u <= 0x1ff)) ||
-             ((u >= 0x201) && (u < 0x202)) ||
-             (u == 0x21a) ||
-             (u == 0x22f) ||
-             ((u >= 0x232) && (u <= 0x23a)) ||
-             ((u >= 0x250) && (u <= 0x251)) ||
-             ((u >= 0x300) && (u <= 0x321)) ||
-             ((u >= 0x324) && (u <= 0x393)) ||
-             ((u >= 0x396) && (u <= 0x397)) ||
-             ((u >= 0x399) && (u <= 0x39B)) ||
-             ((u >= 0x39E) && (u <= 0x3F0)) ||
-             ((u >= 0x3F3) && (u <= 0x3F5)) ||
-             ((u >= 0x3F7) && (u <= 0x4FD)) ||
-             ((u >= 0x4FF) && (u <= 0x53D)) ||
-             ((u >= 0x549) && (u <= 0x54E)) ||
-             ((u >= 0x550) && (u <= 0x567)) ||
-             ((u >= 0x56F) && (u <= 0x570)) ||
-             ((u >= 0x573) && (u <= 0x579)) ||
-             (u == 0x587) ||
-             ((u >= 0x58A) && (u <= 0x58D)) ||
-             (u == 0x590) ||
-             ((u >= 0x595) && (u <= 0x596)) ||
-             (u == 0x5A5) ||
-             (u == 0x5A8) ||
-             ((u >= 0x5B1) && (u <= 0x5B2)) ||
-             (u == 0x5BC) ||
-             ((u >= 0x5C2) && (u <= 0x5C4)) ||
-             ((u >= 0x5D1) && (u <= 0x5D3)) ||
-             ((u >= 0x5DC) && (u <= 0x5DE)) ||
-             (u == 0x5E1) ||
-             (u == 0x5E3) ||
-             (u == 0x5E8) ||
-             (u == 0x5EF) ||
-             (u == 0x5F3) ||
-             ((u >= 0x5FA) && (u <= 0x64F)) ||
-             ((u >= 0x680) && (u <= 0x6C5)) ||
-             ((u >= 0x6CB) && (u <= 0x6D0)) ||
-             ((u >= 0x6E0) && (u <= 0x6E5)) ||
-             (u == 0x6E9) ||
-             ((u >= 0x6EB) && (u <= 0x6EC)) ||
-             (u == 0x6F0) ||
-             (u == 0x6F3) ||
-             ((u >= 0x910) && (u <= 0x918)) ||
-             ((u >= 0x980) && (u <= 0x984)) ||
-             (u == 0x9C0)
-             )
-          return EINA_TRUE;
-     }
+    }
+   return EINA_FALSE;
+}
 
-   // FIXME: can optimize by breaking into tree and ranges
-   // (A)
-   if (ty->termstate.cjk_ambiguous_wide)
-     {
-        // grep ';A #' EastAsianWidth.txt | wc -l
-        // :(
-        if (
-           // aX
-           (((g >> 4) == 0xa) &&
-            (
-               (g == 0x00a1) ||
-               (g == 0x00a4) ||
-               ((g >= 0x00a7) && (g <= 0x00a8)) ||
-               (g == 0x00aa) ||
-               ((g >= 0x00ad) && (g <= 0x00ae)))) ||
-           // bX
-           (((g >> 4) == 0xb) &&
-            (((g >= 0x00b0) && (g <= 0x00bf)))) ||
-           // cX
-           (((g >> 4) == 0xc) &&
-            ((g == 0x00c6))) ||
-           // dX
-           (((g >> 4) == 0xd) &&
-            (
-               (g == 0x00d0) ||
-               ((g >= 0x00d7) && (g <= 0x00d8)) ||
-               ((g >= 0x00de) && (g <= 0x00df)))) ||
-           // eX
-           (((g >> 4) == 0xe) &&
-            (
-               (g == 0x00e0) ||
-               (g == 0x00e1) ||
-               (g == 0x00e6) ||
-               ((g >= 0x00e8) && (g <= 0x00e9)) ||
-               (g == 0x00ea) ||
-               ((g >= 0x00ec) && (g <= 0x00ed)))) ||
-           // fX
-           (((g >> 4) == 0xf) &&
-            (
-               (g == 0x00f0) ||
-               ((g >= 0x00f2) && (g <= 0x00f3)) ||
-               ((g >= 0x00f7) && (g <= 0x00f9)) ||
-               (g == 0x00fa) ||
-               (g == 0x00fc) ||
-               (g == 0x00fe))) ||
-           // 1XX
-           (((g >> 8) == 0x1) &&
-            (
-               (g == 0x0101) ||
-               (g == 0x0111) ||
-               (g == 0x0113) ||
-               (g == 0x011b) ||
-               ((g >= 0x0126) && (g <= 0x0127)) ||
-               (g == 0x012b) ||
-               ((g >= 0x0131) && (g <= 0x0133)) ||
-               (g == 0x0138) ||
-               ((g >= 0x013f) && (g <= 0x0142)) ||
-               (g == 0x0144) ||
-               ((g >= 0x0148) && (g <= 0x014b)) ||
-               (g == 0x014d) ||
-               ((g >= 0x0152) && (g <= 0x0153)) ||
-               ((g >= 0x0166) && (g <= 0x0167)) ||
-               (g == 0x016b) ||
-               (g == 0x01ce) ||
-               (g == 0x01d0) ||
-               (g == 0x01d2) ||
-               (g == 0x01d4) ||
-               (g == 0x01d6) ||
-               (g == 0x01d8) ||
-               (g == 0x01da) ||
-               (g == 0x01dc))) ||
-               // 2XX
-               (((g >> 8) == 0x2) &&
-                (
-                   (g == 0x0251) ||
-                   (g == 0x0261) ||
-                   (g == 0x02c4) ||
-                   (g == 0x02c7) ||
-                   (g == 0x02c9) ||
-                   ((g >= 0x02ca) && (g <= 0x02cb)) ||
-                   (g == 0x02cd) ||
-                   (g == 0x02d0) ||
-                   ((g >= 0x02d8) && (g <= 0x02d9)) ||
-                   ((g >= 0x02da) && (g <= 0x02db)) ||
-                   (g == 0x02dd) ||
-                   (g == 0x02df))) ||
-               // 3XX
-               (((g >> 8) == 0x3) &&
-                (
-                   ((g >= 0x0300) && (g <= 0x036f)) ||
-                   ((g >= 0x0391) && (g <= 0x03c9)))) ||
-               // 4XX
-               (((g >> 8) == 0x4) &&
-                (
-                   (g == 0x0401) ||
-                   ((g >= 0x0410) && (g <= 0x044f)) ||
-                   (g == 0x0451))) ||
-               // 2XXX
-               (((g >> 12) == 0x2) &&
-                ((((g >> 8) == 0x20) &&
-                  (
-                     (g == 0x2010) ||
-                     ((g >= 0x2013) && (g <= 0x2016)) ||
-                     ((g >= 0x2018) && (g <= 0x2019)) ||
-                     (g == 0x201c) ||
-                     (g == 0x201d) ||
-                     ((g >= 0x2020) && (g <= 0x2022)) ||
-                     ((g >= 0x2024) && (g <= 0x2027)) ||
-                     (g == 0x2030) ||
-                     ((g >= 0x2032) && (g <= 0x2033)) ||
-                     (g == 0x2035) ||
-                     (g == 0x203b) ||
-                     (g == 0x203e) ||
-                     (g == 0x2074) ||
-                     (g == 0x207f) ||
-                     ((g >= 0x2081) && (g <= 0x2084)) ||
-                     (g == 0x20ac))) ||
-                 (((g >> 8) == 0x21) &&
-                  (
-                     (g == 0x2103) ||
-                     (g == 0x2105) ||
-                     (g == 0x2109) ||
-                     (g == 0x2113) ||
-                     (g == 0x2116) ||
-                     ((g >= 0x2121) && (g <= 0x2122)) ||
-                     (g == 0x2126) ||
-                     (g == 0x212b) ||
-                     ((g >= 0x2153) && (g <= 0x2154)) ||
-                     ((g >= 0x215b) && (g <= 0x215e)) ||
-                     ((g >= 0x2160) && (g <= 0x216b)) ||
-                     ((g >= 0x2170) && (g <= 0x2179)) ||
-                     ((g >= 0x2189) && (g <= 0x2199)) ||
-                     ((g >= 0x21b8) && (g <= 0x21b9)) ||
-                     (g == 0x21d2) ||
-                     (g == 0x21d4) ||
-                     (g == 0x21e7))) ||
-                 (((g >> 8) == 0x22) &&
-                  (
-                     (g == 0x2200) ||
-                     ((g >= 0x2202) && (g <= 0x2203)) ||
-                     ((g >= 0x2207) && (g <= 0x2208)) ||
-                     (g == 0x220b) ||
-                     (g == 0x220f) ||
-                     (g == 0x2211) ||
-                     (g == 0x2215) ||
-                     (g == 0x221a) ||
-                     ((g >= 0x221d) && (g <= 0x221f)) ||
-                     (g == 0x2220) ||
-                     (g == 0x2223) ||
-                     (g == 0x2225) ||
-                     ((g >= 0x2227) && (g <= 0x222e)) ||
-                     ((g >= 0x2234) && (g <= 0x2237)) ||
-                     ((g >= 0x223c) && (g <= 0x223d)) ||
-                     (g == 0x2248) ||
-                     (g == 0x224c) ||
-                     (g == 0x2252) ||
-                     ((g >= 0x2260) && (g <= 0x2261)) ||
-                     ((g >= 0x2264) && (g <= 0x2267)) ||
-                     ((g >= 0x226a) && (g <= 0x226b)) ||
-                     ((g >= 0x226e) && (g <= 0x226f)) ||
-                     ((g >= 0x2282) && (g <= 0x2283)) ||
-                     ((g >= 0x2286) && (g <= 0x2287)) ||
-                     (g == 0x2295) ||
-                     (g == 0x2299) ||
-                     (g == 0x22a5) ||
-                     (g == 0x22bf))) ||
-                     (((g >> 8) == 0x23) &&
-                      ((g == 0x2312))) ||
-                     ((((g >> 8) == 0x24) || ((g >> 8) == 0x25)) &&
-                      (((g >= 0x2460) && (g <= 0x2595)))) ||
-                     (((g >> 8) == 0x25) &&
-                      (
-                         ((g >= 0x25a0) && (g <= 0x25bd)) ||
-                         ((g >= 0x25c0) && (g <= 0x25c1)) ||
-                         ((g >= 0x25c6) && (g <= 0x25c7)) ||
-                         (g == 0x25c8) ||
-                         (g == 0x25cb) ||
-                         ((g >= 0x25ce) && (g <= 0x25cf)) ||
-                         ((g >= 0x25d0) && (g <= 0x25d1)) ||
-                         ((g >= 0x25e2) && (g <= 0x25e3)) ||
-                         ((g >= 0x25e4) && (g <= 0x25e5)) ||
-                         (g == 0x25ef))) ||
-                     (((g >> 8) == 0x26) &&
-                      (
-                         ((g >= 0x2605) && (g <= 0x2606)) ||
-                         (g == 0x2609) ||
-                         ((g >= 0x260e) && (g <= 0x260f)) ||
-                         ((g >= 0x2614) && (g <= 0x2615)) ||
-                         (g == 0x261c) ||
-                         (g == 0x261e) ||
-                         (g == 0x2640) ||
-                         (g == 0x2642) ||
-                         ((g >= 0x2660) && (g <= 0x2661)) ||
-                         ((g >= 0x2663) && (g <= 0x2665)) ||
-                         ((g >= 0x2667) && (g <= 0x266a)) ||
-                         ((g >= 0x266c) && (g <= 0x266d)) ||
-                         (g == 0x266f) ||
-                         ((g >= 0x269e) && (g <= 0x269f)) ||
-                         ((g >= 0x26be) && (g <= 0x26bf)) ||
-                         ((g >= 0x26c4) && (g <= 0x26cd)) ||
-                         (g == 0x26cf) ||
-                         ((g >= 0x26d0) && (g <= 0x26e1)) ||
-                         (g == 0x26e3) ||
-                         ((g >= 0x26e8) && (g <= 0x26ff)))) ||
-                         (((g >> 8) == 0x27) &&
-                          (
-                             (g == 0x273d) ||
-                             (g == 0x2757) ||
-                             ((g >= 0x2776) && (g <= 0x277f)))) ||
-                         (((g >> 8) == 0x2b) &&
-                          (((g >= 0x2b55) && (g <= 0x2b59)))))) ||
-                          // 3XXX
-                          (((g >> 12) == 0x3) &&
-                           (((g >= 0x3248) && (g <= 0x324f)))) ||
-                          // fXXX
-                          (((g >> 12) == 0xf) &&
-                           (
-                              ((g >= 0xfe00) && (g <= 0xfe0f)) ||
-                              (g == 0xfffd))) ||
-                          // 1XXXX
-                          (((g >> 16) == 0x1) &&
-                           (
-                              ((g >= 0x1f100) && (g <= 0x1f12d)) ||
-                              ((g >= 0x1f130) && (g <= 0x1f169)) ||
-                              ((g >= 0x1f170) && (g <= 0x1f19a)))) ||
-                          // eXXXX
-                          (((g >> 16) == 0xe) &&
-                           (((g >= 0xe0100) && (g <= 0xe01ef)))) ||
-                          // fXXXX
-                          (((g >> 16) == 0xf) &&
-                           (((g >= 0xf0000) && (g <= 0xffffd)))) ||
-                          // 1XXXXX
-                          (((g >> 24) == 0x1) &&
-                           (((g >= 0x100000) && (g <= 0x10fffd)))))
-                           return EINA_TRUE;
-     }
+__attribute__((const))
+Eina_Bool
+_termpty_is_wide(Eina_Unicode g)
+{
+    switch (g)
+      {
+       case 0xA9: EINA_FALLTHROUGH;
+       case 0xAE: EINA_FALLTHROUGH;
+       case 0x1100 ... 0x115F: EINA_FALLTHROUGH;
+       case 0x203C: EINA_FALLTHROUGH;
+       case 0x2049: EINA_FALLTHROUGH;
+       case 0x2122: EINA_FALLTHROUGH;
+       case 0x2139: EINA_FALLTHROUGH;
+       case 0x2194 ... 0x2199: EINA_FALLTHROUGH;
+       case 0x21A9 ... 0x21AA: EINA_FALLTHROUGH;
+       case 0x231A ... 0x231B: EINA_FALLTHROUGH;
+       case 0x2328 ... 0x232A: EINA_FALLTHROUGH;
+       case 0x2388: EINA_FALLTHROUGH;
+       case 0x23CF: EINA_FALLTHROUGH;
+       case 0x23E9 ... 0x23F3: EINA_FALLTHROUGH;
+       case 0x23F8 ... 0x23FA: EINA_FALLTHROUGH;
+       case 0x24C2: EINA_FALLTHROUGH;
+       case 0x25AA ... 0x25AB: EINA_FALLTHROUGH;
+       case 0x25B6: EINA_FALLTHROUGH;
+       case 0x25C0: EINA_FALLTHROUGH;
+       case 0x25FB ... 0x25FE: EINA_FALLTHROUGH;
+       case 0x2600 ... 0x2605: EINA_FALLTHROUGH;
+       case 0x2607 ... 0x2612: EINA_FALLTHROUGH;
+       case 0x2614 ... 0x2685: EINA_FALLTHROUGH;
+       case 0x2690 ... 0x2705: EINA_FALLTHROUGH;
+       case 0x2708 ... 0x2712: EINA_FALLTHROUGH;
+       case 0x2714: EINA_FALLTHROUGH;
+       case 0x2716: EINA_FALLTHROUGH;
+       case 0x271D: EINA_FALLTHROUGH;
+       case 0x2721: EINA_FALLTHROUGH;
+       case 0x2728: EINA_FALLTHROUGH;
+       case 0x2733 ... 0x2734: EINA_FALLTHROUGH;
+       case 0x2744: EINA_FALLTHROUGH;
+       case 0x2747: EINA_FALLTHROUGH;
+       case 0x274C: EINA_FALLTHROUGH;
+       case 0x274E: EINA_FALLTHROUGH;
+       case 0x2753 ... 0x2755: EINA_FALLTHROUGH;
+       case 0x2757: EINA_FALLTHROUGH;
+       case 0x2763 ... 0x2767: EINA_FALLTHROUGH;
+       case 0x2795 ... 0x2797: EINA_FALLTHROUGH;
+       case 0x27A1: EINA_FALLTHROUGH;
+       case 0x27B0: EINA_FALLTHROUGH;
+       case 0x27BF: EINA_FALLTHROUGH;
+       case 0x2934 ... 0x2935: EINA_FALLTHROUGH;
+       case 0x2B05 ... 0x2B07: EINA_FALLTHROUGH;
+       case 0x2B1B ... 0x2B1C: EINA_FALLTHROUGH;
+       case 0x2B50: EINA_FALLTHROUGH;
+       case 0x2B55: EINA_FALLTHROUGH;
+       case 0x2E80 ... 0x303E: EINA_FALLTHROUGH;
+       case 0x3041 ... 0x3247: EINA_FALLTHROUGH;
+       case 0x3250 ... 0x4DBF: EINA_FALLTHROUGH;
+       case 0x4E00 ... 0xA4C6: EINA_FALLTHROUGH;
+       case 0xA960 ... 0xA97C: EINA_FALLTHROUGH;
+       case 0xAC00 ... 0xD7A3: EINA_FALLTHROUGH;
+       case 0xF900 ... 0xFAD9: EINA_FALLTHROUGH;
+       case 0xFE10 ... 0xFE19: EINA_FALLTHROUGH;
+       case 0xFE30 ... 0xFE6B: EINA_FALLTHROUGH;
+       case 0xFF01 ... 0xFF60: EINA_FALLTHROUGH;
+       case 0xFFE0 ... 0xFFE6: EINA_FALLTHROUGH;
+       case 0x16FE0 ... 0x1B2FB: EINA_FALLTHROUGH;
+       case 0x1F000 ... 0x1F0F5: EINA_FALLTHROUGH;
+       case 0x1F10D ... 0x1F10F: EINA_FALLTHROUGH;
+       case 0x1F12F: EINA_FALLTHROUGH;
+       case 0x1F16C ... 0x1F171: EINA_FALLTHROUGH;
+       case 0x1F17E ... 0x1F17F: EINA_FALLTHROUGH;
+       case 0x1F18E: EINA_FALLTHROUGH;
+       case 0x1F191 ... 0x1F19A: EINA_FALLTHROUGH;
+       case 0x1F1AD: EINA_FALLTHROUGH;
+       case 0x1F200 ... 0x1F53D: EINA_FALLTHROUGH;
+       case 0x1F546 ... 0x1F64F: EINA_FALLTHROUGH;
+       case 0x1F680 ... 0x1F6FC: EINA_FALLTHROUGH;
+       case 0x1F7D5 ... 0x1F7EB: EINA_FALLTHROUGH;
+       case 0x1F8B0 ... 0x1F8B1: EINA_FALLTHROUGH;
+       case 0x1F90C ... 0x1F93A: EINA_FALLTHROUGH;
+       case 0x1F93C ... 0x1F945: EINA_FALLTHROUGH;
+       case 0x1F947 ... 0x1FAD6: EINA_FALLTHROUGH;
+       case 0x20000 ... 0x3134A:
 
-   // Na, H -> not checked
+        return EINA_TRUE;
+    }
    return EINA_FALSE;
 }
diff --git a/src/bin/termptydbl.h b/src/bin/termptydbl.h
index 142e16c..ca365b2 100644
--- a/src/bin/termptydbl.h
+++ b/src/bin/termptydbl.h
@@ -1,25 +1,20 @@
+/* XXX: Code generated by tool unicode_dbl_width.py */
 #ifndef _TERMPTY_DBL_H__
 #define _TERMPTY_DBL_H__ 1
 
-Eina_Bool _termpty_is_dblwidth_slow_get(const Termpty *ty, int g);
+Eina_Bool _termpty_is_wide(const Eina_Unicode g);
+Eina_Bool _termpty_is_ambigous_wide(const Eina_Unicode g);
 
 static inline Eina_Bool
-_termpty_is_dblwidth_get(const Termpty *ty, int g)
+_termpty_is_dblwidth_get(const Termpty *ty, const Eina_Unicode g)
 {
-   // check for east asian full-width (F), half-width (H), wide (W),
-   // narrow (Na) or ambiguous (A) codepoints
-   // ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt
-
-   // optimize for latin1 non-ambiguous
-   if (g <= 0xa0)
+   /* optimize for latin1 non-ambiguous */
+   if (g <= 0xA0)
      return EINA_FALSE;
-   // (F)
-   if ((g == 0x3000) ||
-       ((g >= 0xff01) && (g <= 0xff60)) ||
-       ((g >= 0xffe0) && (g <= 0xffe6)))
-     return EINA_TRUE;
-
-   return _termpty_is_dblwidth_slow_get(ty, g);
+   if (!ty->termstate.cjk_ambiguous_wide)
+     return _termpty_is_wide(g);
+   else
+     return _termpty_is_ambigous_wide(g);
 }
 
 #endif
diff --git a/tools/unicode_dbl_width.py b/tools/unicode_dbl_width.py
new file mode 100755
index 0000000..2457834
--- /dev/null
+++ b/tools/unicode_dbl_width.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python3
+
+"""
+Generate src/bin/termptydbl.{c,h} from unicode files
+used with ucd.all.flat.xml from
+https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.all.flat.zip
+"""
+
+import argparse
+from collections import namedtuple
+import xml.etree.ElementTree as ET
+
+Range = namedtuple('range', ['width', 'start', 'end'])
+
+def get_ranges(xmlfile, emoji_as_wide):
+    tree = ET.parse(xmlfile)
+    root = tree.getroot()
+    repertoire = 
root.find("{http://www.unicode.org/ns/2003/ucd/1.0}repertoire";)
+    chars = repertoire.findall("{http://www.unicode.org/ns/2003/ucd/1.0}char";)
+
+    ranges = []
+    range = Range('N', 0, 0)
+    for c in chars:
+        ea = c.get('ea')
+        if ea in ('Na', 'H'):
+            ea = 'N'
+        if ea in ('F'):
+            ea = 'W'
+        assert ea in ('N', 'A', 'W')
+        cp = c.get('cp')
+        if not cp:
+            continue
+        if emoji_as_wide:
+            emoji = c.get('ExtPict')
+            if emoji == 'Y':
+                ea = 'W'
+
+        cp = int(cp, 16)
+        if ea != range[0]:
+            ranges.append(range)
+            range = Range(ea, cp, cp)
+        else:
+            range = range._replace(end=cp)
+
+    ranges.append(range)
+
+    return ranges
+
+def merge_ranges(ranges, is_same_width):
+    res = []
+    range = ranges[0]
+    for r in ranges:
+        if is_same_width(r, range):
+            range = range._replace(end=r.end)
+        else:
+            res.append(range)
+            range = r
+    res.append(range)
+    return res
+
+def skip_ranges(ranges, width_skipped):
+    res = []
+    for r in ranges:
+        if r.width not in width_skipped:
+            res.append(r)
+    return res
+
+def gen_header(range, file_header):
+    file_header.write(
+"""/* XXX: Code generated by tool unicode_dbl_width.py */
+#ifndef _TERMPTY_DBL_H__
+#define _TERMPTY_DBL_H__ 1
+
+Eina_Bool _termpty_is_wide(const Eina_Unicode g);
+Eina_Bool _termpty_is_ambigous_wide(const Eina_Unicode g);
+
+static inline Eina_Bool
+_termpty_is_dblwidth_get(const Termpty *ty, const Eina_Unicode g)
+{
+   /* optimize for latin1 non-ambiguous */
+""")
+    file_header.write(f"   if (g <= 0x{range.end:X})")
+    file_header.write(
+"""
+     return EINA_FALSE;
+   if (!ty->termstate.cjk_ambiguous_wide)
+     return _termpty_is_wide(g);
+   else
+     return _termpty_is_ambigous_wide(g);
+}
+
+#endif
+""")
+
+def gen_ambigous(ranges, file_source):
+    file_source.write(
+"""
+__attribute__((const))
+Eina_Bool
+_termpty_is_ambigous_wide(Eina_Unicode g)
+{
+    switch (g)
+      {
+""")
+    def is_same_width(r1, r2):
+        if r1.width == 'N':
+            return r2.width == 'N'
+        else:
+            return r2.width in ('A', 'W')
+    ranges = merge_ranges(ranges[1:], is_same_width)
+    ranges = skip_ranges(ranges, ('N',))
+
+    fallthrough = " EINA_FALLTHROUGH;"
+    for idx, r in enumerate(ranges):
+        if r.width == 'N':
+            continue;
+        if idx == len(ranges) -1:
+            fallthrough = ""
+        if r.start == r.end:
+            file_source.write(f"       case 0x{r.start:X}:{fallthrough}\n")
+        else:
+            file_source.write(f"       case 0x{r.start:X} ... 
0x{r.end:X}:{fallthrough}\n")
+
+    file_source.write(
+"""
+        return EINA_TRUE;
+    }
+   return EINA_FALSE;
+}
+""")
+
+def gen_wide(ranges, file_source):
+    file_source.write(
+"""
+__attribute__((const))
+Eina_Bool
+_termpty_is_wide(Eina_Unicode g)
+{
+    switch (g)
+      {
+""")
+    def is_same_width(r1, r2):
+        if r1.width in ('N', 'A'):
+            return r2.width in ('N', 'A')
+        else:
+            return r2.width == 'W'
+    ranges = merge_ranges(ranges[1:], is_same_width)
+    ranges = skip_ranges(ranges, ('N', 'A'))
+    fallthrough = " EINA_FALLTHROUGH;"
+    for idx, r in enumerate(ranges):
+        if r.width in ('N', 'A'):
+            continue;
+        if idx == len(ranges) -1:
+            fallthrough = ""
+        if r.start == r.end:
+            file_source.write(f"       case 0x{r.start:X}:{fallthrough}\n")
+        else:
+            file_source.write(f"       case 0x{r.start:X} ... 
0x{r.end:X}:{fallthrough}\n")
+
+    file_source.write(
+"""
+        return EINA_TRUE;
+    }
+   return EINA_FALSE;
+}
+""")
+
+
+def gen_c(ranges, file_header, file_source):
+    gen_header(ranges[0], file_header)
+    file_source.write(
+"""/* XXX: Code generated by tool unicode_dbl_width.py */
+#include "private.h"
+
+#include <Elementary.h>
+#include "termpty.h"
+#include "termptydbl.h"
+""")
+    gen_ambigous(ranges, file_source)
+    gen_wide(ranges, file_source)
+
+parser = argparse.ArgumentParser(description='Generate code handling different 
widths of unicode codepoints.')
+parser.add_argument('xml', type=argparse.FileType('r'))
+parser.add_argument('header', type=argparse.FileType('w'))
+parser.add_argument('source', type=argparse.FileType('w'))
+
+args = parser.parse_args()
+
+ranges = get_ranges(args.xml, True)
+gen_c(ranges, args.header, args.source)

-- 


Reply via email to