Changeset: e3930423f7df for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/e3930423f7df
Added Files:
        misc/bash/uniwidthtab.sh
Modified Files:
        common/utils/mutf8.c
Branch: default
Log Message:

Add script to create charwidth table.


diffs (73 lines):

diff --git a/common/utils/mutf8.c b/common/utils/mutf8.c
--- a/common/utils/mutf8.c
+++ b/common/utils/mutf8.c
@@ -19,6 +19,7 @@ struct interval {
        int width;
 };
 
+/* this table was created using the script uniwidthtab.sh */
 static const struct interval intervals[] = {
        /* sorted list of non-overlapping ranges:
         * ranges with width==0 represent all codepoints with
diff --git a/misc/bash/uniwidthtab.sh b/misc/bash/uniwidthtab.sh
new file mode 100755
--- /dev/null
+++ b/misc/bash/uniwidthtab.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# SPDX-License-Identifier: MPL-2.0
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0.  If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# Copyright 2024 MonetDB Foundation;
+# Copyright August 2008 - 2023 MonetDB B.V.;
+# Copyright 1997 - July 2008 CWI.
+
+{
+    sed -n -e '/# Mn/d' \
+       -e 's/^\([0-9A-F][0-9A-F]*\)\.\.\([0-9A-F][0-9A-F]*\) *; *[FW].*/0x\1 
0x\2/p' \
+       -e 's/^\([0-9A-F][0-9A-F]*\) *; [FW].*/0x\1 0x\1/p' \
+       < /usr/share/unicode/ucd/EastAsianWidth.txt | {
+       while read line; do
+           line=($line)
+           f=$((${line[0]%}))
+           l=$((${line[1]%}))
+           if [[ -n $prevl ]]; then
+               if (($prevl+1 == $f)); then
+                   prevl=$l
+               else
+                   printf '\t{ 0x%05X, 0x%05X, 2 },\n' $prevf $prevl
+                   prevf=$f
+                   prevl=$l
+               fi
+           else
+               prevf=$f
+               prevl=$l
+           fi
+       done
+       printf '\t{ 0x%05X, 0x%05X, 2 },\n' $prevf $prevl
+    }
+
+    sed -n '/^00AD/d;s/^\([0-9A-F][0-9A-F]*\);[^;]*;\(Me\|Mn\|Cf\);.*/0x\1/p' \
+       < /usr/share/unicode/ucd/UnicodeData.txt | {
+       while read line; do
+           u=$(($line))
+           if [[ -n $prevf ]]; then
+               if (($prevl+1 == $u)); then
+                   prevl=$u
+               else
+                   printf '\t{ 0x%05X, 0x%05X, 0 },\n' $prevf $prevl
+                   prevf=$u
+                   prevl=$u
+               fi
+           else
+               prevf=$u
+               prevl=$u
+           fi
+       done
+       printf '\t{ 0x%05X, 0x%05X, 0 },\n' $prevf $prevl
+    }
+} | sort | sed 's/0x0\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\)/0x\1/g'
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to