tasn pushed a commit to branch master. http://git.enlightenment.org/core/efl.git/commit/?id=abb7310506ce825404788b7674f2bf3961c9df45
commit abb7310506ce825404788b7674f2bf3961c9df45 Author: Tom Hacohen <t...@stosb.com> Date: Tue Dec 6 12:45:40 2016 +0000 Static deps unibreak: Update to latest version. This version supports Unicode 9.0 and includes many fixes. Reference git hash: fe1ce2e78c19fa2b4b7a92b1864a12b432da6ec6 This version is not yet released, but now is a better time to sync it, and there are no code changes expected, only "admin" work. Main changes: Unicode 9.0 support Many fixes in the lineberaking algorithm to now pass the Unicode reference test data. @feature --- src/static_libs/libunibreak/AUTHORS | 2 + src/static_libs/libunibreak/ChangeLog | 165 ++++++++++++++++++++++++ src/static_libs/libunibreak/linebreak.c | 192 +++++++++++++++++++--------- src/static_libs/libunibreak/linebreak.h | 10 +- src/static_libs/libunibreak/linebreakdata.c | 176 +++++++++++++++++++------ src/static_libs/libunibreak/linebreakdef.c | 10 +- src/static_libs/libunibreak/linebreakdef.h | 25 ++-- src/static_libs/libunibreak/wordbreak.c | 71 +++++++++- src/static_libs/libunibreak/wordbreakdata.c | 97 ++++++++++++-- src/static_libs/libunibreak/wordbreakdef.h | 5 + 10 files changed, 627 insertions(+), 126 deletions(-) diff --git a/src/static_libs/libunibreak/AUTHORS b/src/static_libs/libunibreak/AUTHORS index 1b4f4b4..34b5c9a 100644 --- a/src/static_libs/libunibreak/AUTHORS +++ b/src/static_libs/libunibreak/AUTHORS @@ -9,3 +9,5 @@ Thomas Klausner. Autoconfiscated and libtoolized liblinebreak. Tom Hacohen. Added word boundaries support. Petr Filipsky. Added incremental processing for line-breaking. + +Andreas Röver. Added grapheme boundaries support. diff --git a/src/static_libs/libunibreak/ChangeLog b/src/static_libs/libunibreak/ChangeLog index f6c4a3d..cad33cf 100644 --- a/src/static_libs/libunibreak/ChangeLog +++ b/src/static_libs/libunibreak/ChangeLog @@ -1,3 +1,168 @@ +2016-12-04 Wu Yongwei <wuyong...@gmail.com> + + Simpify implementation about RI pairing. + * src/linebreak.c (treat_first_char): Get rid of the special + processing in the first character. + (get_lb_result_lookup): Refactor implementation. + +2016-12-03 Wu Yongwei <wuyong...@gmail.com> + + * tools/test.txt: Make a statement more precise. + +2016-12-03 Wu Yongwei <wuyong...@gmail.com> + + * src/linebreak.c (get_lb_result_lookup): Simplify code and fix a + corner case about LB21a. + (treat_first_char): There is no need to treat first character of + Hebrew specially now. + +2016-12-03 Wu Yongwei <wuyong...@gmail.com> + + * src/linebreakdef.h (struct LineBreakContext): Add new field + cLb30aRI. + * src/linebreak.c (lb_init_break_context): Initialize cLb30aRI. + (treat_first_char): Deal with leading RI. + (get_lb_result_lookup): Count RI characters and allow breaking + between each pair occurrence. + +2016-12-03 Wu Yongwei <wuyong...@gmail.com> + + * src/linebreak.c (baTable): Fix a few missing entries. + +2016-12-03 Wu Yongwei <wuyong...@gmail.com> + + Fix test failure regarding Object Replacement Character (U+FFFC). + * src/linebreakdef.h (enum LineBreakClass): Move LBP_CB so that it + can be included in the pair table. + * src/linebreak.c (baTable): Add break action about LBP_CB. + (treat_first_char): Remove customization about LBP_CB. + (get_lb_result_simple): Ditto. + (get_lb_result_lookup): Change assertion about the maximum valid + baTable index. + +2016-11-29 Wu Yongwei <wuyong...@gmail.com> + + * src/linebreak.c (ends_with): New static function. + (ENDS_WITH): New macro. + (resolve_lb_class): Use ENDS_WITH to make the code cleaner. + +2016-11-28 Wu Yongwei <wuyong...@gmail.com> + + * src/linebreak.c (resolve_lb_class): Resolve LBP_CJ to LBP_NS if + lang ends with "-strict". + * src/tests.c: Use "-strict" in line breaking test. + +2016-11-26 Wu Yongwei <wuyong...@gmail.com> + + * .clang-format: `Modernize' the clang-format configuration with + Clang 3.8. + +2016-11-26 Wu Yongwei <wuyong...@gmail.com> + + * src/linebreak.c (get_lb_result_lookup): Fix an issue that + combining marks are not correctly dealt with. + +2016-11-23 Tom Hacohen <t...@stosb.com> + + * src/wordbreak.c (set_wordbreaks): Fix to pass the test suite. + +2016-11-22 Andreas Röver <roe...@users.sf.net> + + Add grapheme breaking support. + * AUTHORS: Add `Andreas Röver'. + * src/Makefile.am (include_HEADERS): Add header files for grapheme + breaking. + (libunibreak_la_SOURCES): Add source files for grapheme breaking. + (distclean-local): Clean also `GraphemeBreakData.txt'. + (GraphemeBreakProperty.txt): New target. + (graphemebreakdata): New target. + * src/graphemebreak.c: New file. + * src/graphemebreak.h: New file. + * src/graphemebreakdef.h: New file. + * src/graphemebreakdata.c: New file. + * src/graphemebreakdata1.tmpl: New file. + * src/graphemebreakdata2.tmpl: New file. + * tools/graphemebreak_test.c: New file. + +2016-11-22 Wu Yongwei <wuyong...@gmail.com> + + * src/tests.c: Adjust code style. + +2016-11-22 Wu Yongwei <wuyong...@gmail.com> + + * .clang-format: New file. + +2016-11-22 Tom Hacohen <t...@stosb.com> + + * src/tests.c: Add a test suite (make check). + * Makefile.am: Ditto. + * src/Makefile.am: Ditto. + +2016-11-17 Tom Hacohen <t...@stosb.com> + + * src/wordbreak.c: Update to Unicode 9.0.0. + * src/wordbreakdata.c: Ditto. + * src/wordbreakdef.h: Ditto. + +2016-11-16 Tom Hacohen <t...@stosb.com> + + * src/wordbreak.c (set_wordbreaks): Fix handling of regional + indicators with utf-8/16. + +2016-11-03 Mikhail Polubisok <m_polubi...@wargaming.net> + + * src/linebreak.c (get_lb_result_lookup): Fix assertion test of max + available indices. + +2016-09-10 Wu Yongwei <wuyong...@gmail.com> + + Update to Unicode 9.0.0. + * src/linebreak.c (baTable): Update according to Unicode 9.0.0. + * src/linebreakdef.h (enum LineBreakClass): Ditto. + * src/linebreakdata.c: Regenerate from LineBreak-9.0.0.txt. + * src/linebreak.h: Update comments. + * src/linebreakdef.c: Ditto. + +2016-08-24 Tom Hacohen <t...@stosb.com> + + Make many structures const. + + These structures should never be changed on runtime so they should + be marked as constant. This means the compiler can now warn us if we + make the mistake of trying to change any of them, but more + importantly, it gives the compiler more information about the nature + of these and therefor lets the linker map these structures to + read-only memory instead of read-write, which should improve page + deduplication in many cases and reduce overall system memory usage. + + This has reduced the number of dirty memory pages from 10 to 2, + which translates to 32KiB of memory saved per process linking to + libunibreak starting from the second process. + * src/linebreak.c (struct LineBreakPropertiesIndex): Mark member + variable lbp as const pointer. + (get_lb_prop_lang): Mark return value as const pointer. + (get_char_lb_class): Mark second parameter as const pointer. + (get_char_lb_class_lang): Ditto. + * src/linebreakdata.c (lb_prop_default): Mark as const. + * src/linebreakdata2.tmpl (lb_prop_default): Ditto. + * src/linebreakdef.c (lb_prop_English): Ditto. + (lb_prop_German): Ditto. + (lb_prop_Spanish): Ditto. + (lb_prop_French): Ditto. + (lb_prop_Russian): Ditto. + (lb_prop_Chinese): Ditto. + (lb_prop_lang_map): Ditto. + * src/linebreakdef.h (struct LineBreakPropertiesLang): Mark member + variable lbp as const pointer. + (struct LineBreakContext): Mark member variable lbpLang as const + pointer. + (lb_prop_default): Declare as const. + (lb_prop_lang_map): Ditto. + * src/wordbreak.c (get_char_wb_class): Mark second parameter as + const pointer. + * src/wordbreakdata.c (wb_prop_default): Mark as const. + * src/wordbreakdata1.tmpl (wb_prop_default): Ditto. + 2015-12-20 Wu Yongwei <wuyong...@gmail.com> Fix the issue that U+FFFC (Object Replacement Character) does not diff --git a/src/static_libs/libunibreak/linebreak.c b/src/static_libs/libunibreak/linebreak.c index e3b1ca1..3d87a04 100644 --- a/src/static_libs/libunibreak/linebreak.c +++ b/src/static_libs/libunibreak/linebreak.c @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2015 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com> * * This software is provided 'as-is', without any express or implied @@ -31,9 +31,9 @@ * Unicode 5.0.0: * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> * - * This library has been updated according to Revision 35, for - * Unicode 8.0.0: - * <URL:http://www.unicode.org/reports/tr14/tr14-35.html> + * This library has been updated according to Revision 37, for + * Unicode 9.0.0: + * <URL:http://www.unicode.org/reports/tr14/tr14-37.html> * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> @@ -45,7 +45,7 @@ * Implementation of the line breaking algorithm as described in Unicode * Standard Annex 14. * - * @version 3.0, 2015/05/10 + * @version 3.2, 2016/12/04 * @author Wu Yongwei * @author Petr Filipsky */ @@ -81,183 +81,207 @@ enum BreakAction /** * Break action pair table. This is a direct mapping of Table 2 of - * Unicode Standard Annex 14, Revision 30. + * Unicode Standard Annex 14, Revision 37. */ -static enum BreakAction baTable[LBP_RI][LBP_RI] = { +static enum BreakAction baTable[LBP_CB][LBP_CB] = { { /* OP */ PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, CMP_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, - PRH_BRK }, + PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK }, { /* CL */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* CP */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* QU */ PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, - IND_BRK }, + IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK }, { /* GL */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, - IND_BRK }, + IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK }, { /* NS */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* EX */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* SY */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, PRH_BRK, + PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* IS */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* PR */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, - DIR_BRK }, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK }, { /* PO */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* NU */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* AL */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, + PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* HL */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, + PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* ID */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* IN */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* HY */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* BA */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* BB */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, - IND_BRK }, + IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK }, { /* B2 */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* ZW */ DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, { /* CM */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, + PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* WJ */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, - IND_BRK }, + IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK }, { /* H2 */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* H3 */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* JL */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* JV */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* JT */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, - DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* RI */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - IND_BRK }, + IND_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, + { /* EB */ + DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, + PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK }, + { /* EM */ + DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, + PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, + { /* ZWJ */ + DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, + PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK }, + { /* CB */ + DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, + PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, }; /** @@ -265,7 +289,7 @@ static enum BreakAction baTable[LBP_RI][LBP_RI] = { */ struct LineBreakPropertiesIndex { - utf32_t end; /**< End coding point */ + utf32_t end; /**< End coding point */ const struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */ }; @@ -278,6 +302,36 @@ static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] = }; /** + * Checks whether the \a str ends with \a suffix, which has length + * \a suffix_len. + * + * @param str string whose ending is to be checked + * @param suffix string to check + * @param suffixLen length of \a suffix + * @return non-zero if true; zero otherwise + */ +static __inline int ends_with(const char *str, const char *suffix, + unsigned suffixLen) +{ + if (str == NULL) + { + return 0; + } + unsigned len = strlen(str); + if (len >= suffixLen && + memcmp(str + len - suffixLen, suffix, suffixLen) == 0) + { + return 1; + } + else + { + return 0; + } +} + +#define ENDS_WITH(str, suffix) ends_with((str), (suffix), sizeof(suffix) - 1) + +/** * Initializes the second-level index to the line breaking properties. * If it is not called, the performance of #get_char_lb_class_lang (and * thus the main functionality) can be pretty bad, especially for big @@ -425,10 +479,17 @@ static enum LineBreakClass resolve_lb_class( return LBP_AL; } case LBP_CJ: - /* Simplified for `normal' line breaking. See - * <url:http://www.unicode.org/reports/tr14/tr14-30.html#CJ> + /* `Strict' and `normal' line breaking. See + * <url:http://www.unicode.org/reports/tr14/#CJ> * for details. */ - return LBP_ID; + if (ENDS_WITH(lang, "-strict")) + { + return LBP_NS; + } + else + { + return LBP_ID; + } case LBP_SA: case LBP_SG: case LBP_XX: @@ -454,14 +515,9 @@ static void treat_first_char( case LBP_NL: lbpCtx->lbcCur = LBP_BK; /* Rule LB5 */ break; - case LBP_CB: - lbpCtx->lbcCur = LBP_BA; /* Rule LB20 */ - break; case LBP_SP: lbpCtx->lbcCur = LBP_WJ; /* Leading space treated as WJ */ break; - case LBP_HL: - lbpCtx->fLb21aHebrew = 1; /* Rule LB21a */ default: break; } @@ -502,9 +558,6 @@ static int get_lb_result_simple( case LBP_CR: lbpCtx->lbcCur = LBP_CR; return LINEBREAK_NOBREAK; /* Rule LB6 */ - case LBP_CB: - lbpCtx->lbcCur = LBP_B2; - return LINEBREAK_ALLOWBREAK; /* Rule LB20 */ default: return LINEBREAK_UNDEFINED; /* Table lookup is needed */ } @@ -529,19 +582,26 @@ static int get_lb_result_lookup( { int brk = LINEBREAK_UNDEFINED; - assert(lbpCtx->lbcCur <= LBP_RI); - assert(lbpCtx->lbcNew <= LBP_RI); + assert(lbpCtx->lbcCur <= LBP_CB); + assert(lbpCtx->lbcNew <= LBP_CB); switch (baTable[lbpCtx->lbcCur - 1][lbpCtx->lbcNew - 1]) { case DIR_BRK: brk = LINEBREAK_ALLOWBREAK; break; - case CMI_BRK: case IND_BRK: brk = (lbpCtx->lbcLast == LBP_SP) ? LINEBREAK_ALLOWBREAK : LINEBREAK_NOBREAK; break; + case CMI_BRK: + brk = LINEBREAK_ALLOWBREAK; + if (lbpCtx->lbcLast != LBP_SP) + { + brk = LINEBREAK_NOBREAK; + return brk; /* Do not update lbcCur */ + } + break; case CMP_BRK: brk = LINEBREAK_NOBREAK; if (lbpCtx->lbcLast != LBP_SP) @@ -559,9 +619,24 @@ static int get_lb_result_lookup( brk = LINEBREAK_NOBREAK; lbpCtx->fLb21aHebrew = 0; } - else if (!(lbpCtx->lbcNew == LBP_HY || lbpCtx->lbcNew == LBP_BA)) + else + { + lbpCtx->fLb21aHebrew = (lbpCtx->lbcCur == LBP_HL); + } + + /* Special processing due to rule LB30a */ + if (lbpCtx->lbcCur == LBP_RI) + { + lbpCtx->cLb30aRI++; + if (lbpCtx->cLb30aRI == 2 && lbpCtx->lbcNew == LBP_RI) + { + brk = LINEBREAK_ALLOWBREAK; + lbpCtx->cLb30aRI = 0; + } + } + else { - lbpCtx->fLb21aHebrew = (lbpCtx->lbcNew == LBP_HL); + lbpCtx->cLb30aRI = 0; } lbpCtx->lbcCur = lbpCtx->lbcNew; @@ -589,6 +664,7 @@ void lb_init_break_context( get_char_lb_class_lang(ch, lbpCtx->lbpLang), lbpCtx->lang); lbpCtx->fLb21aHebrew = 0; + lbpCtx->cLb30aRI = 0; treat_first_char(lbpCtx); } diff --git a/src/static_libs/libunibreak/linebreak.h b/src/static_libs/libunibreak/linebreak.h index 7d3b757..7753b3d 100644 --- a/src/static_libs/libunibreak/linebreak.h +++ b/src/static_libs/libunibreak/linebreak.h @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2015 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -30,9 +30,9 @@ * Unicode 5.0.0: * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> * - * This library has been updated according to Revision 33, for - * Unicode 7.0.0: - * <URL:http://www.unicode.org/reports/tr14/tr14-33.html> + * This library has been updated according to Revision 37, for + * Unicode 9.0.0: + * <URL:http://www.unicode.org/reports/tr14/tr14-37.html> * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> @@ -43,7 +43,7 @@ * * Header file for the line breaking algorithm. * - * @version 3.0, 2015/05/10 + * @version 3.1, 2016/09/10 * @author Wu Yongwei */ diff --git a/src/static_libs/libunibreak/linebreakdata.c b/src/static_libs/libunibreak/linebreakdata.c index dcf39dc..c571f2d 100644 --- a/src/static_libs/libunibreak/linebreakdata.c +++ b/src/static_libs/libunibreak/linebreakdata.c @@ -1,6 +1,6 @@ /* The content of this file is generated from: -# LineBreak-8.0.0.txt -# Date: 2015-02-13, 09:15:00 GMT [KW, LI] +# LineBreak-9.0.0.txt +# Date: 2016-05-26, 01:00:00 GMT [KW, LI] */ #include "linebreakdef.h" @@ -159,7 +159,9 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x0829, 0x082D, LBP_CM }, { 0x0830, 0x0858, LBP_AL }, { 0x0859, 0x085B, LBP_CM }, - { 0x085E, 0x08B4, LBP_AL }, + { 0x085E, 0x08BD, LBP_AL }, + { 0x08D4, 0x08E1, LBP_CM }, + { 0x08E2, 0x08E2, LBP_AL }, { 0x08E3, 0x0903, LBP_CM }, { 0x0904, 0x0939, LBP_AL }, { 0x093A, 0x093C, LBP_CM }, @@ -230,7 +232,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x0C58, 0x0C61, LBP_AL }, { 0x0C62, 0x0C63, LBP_CM }, { 0x0C66, 0x0C6F, LBP_NU }, - { 0x0C78, 0x0C7F, LBP_AL }, + { 0x0C78, 0x0C80, LBP_AL }, { 0x0C81, 0x0C83, LBP_CM }, { 0x0C85, 0x0CB9, LBP_AL }, { 0x0CBC, 0x0CBC, LBP_CM }, @@ -243,12 +245,12 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x0D01, 0x0D03, LBP_CM }, { 0x0D05, 0x0D3D, LBP_AL }, { 0x0D3E, 0x0D4D, LBP_CM }, - { 0x0D4E, 0x0D4E, LBP_AL }, + { 0x0D4E, 0x0D56, LBP_AL }, { 0x0D57, 0x0D57, LBP_CM }, - { 0x0D5F, 0x0D61, LBP_AL }, + { 0x0D58, 0x0D61, LBP_AL }, { 0x0D62, 0x0D63, LBP_CM }, { 0x0D66, 0x0D6F, LBP_NU }, - { 0x0D70, 0x0D75, LBP_AL }, + { 0x0D70, 0x0D78, LBP_AL }, { 0x0D79, 0x0D79, LBP_PO }, { 0x0D7A, 0x0D7F, LBP_AL }, { 0x0D82, 0x0D83, LBP_CM }, @@ -364,7 +366,9 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x180B, 0x180D, LBP_CM }, { 0x180E, 0x180E, LBP_GL }, { 0x1810, 0x1819, LBP_NU }, - { 0x1820, 0x18A8, LBP_AL }, + { 0x1820, 0x1884, LBP_AL }, + { 0x1885, 0x1886, LBP_CM }, + { 0x1887, 0x18A8, LBP_AL }, { 0x18A9, 0x18A9, LBP_CM }, { 0x18AA, 0x191E, LBP_AL }, { 0x1920, 0x193B, LBP_CM }, @@ -407,7 +411,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1C50, 0x1C59, LBP_NU }, { 0x1C5A, 0x1C7D, LBP_AL }, { 0x1C7E, 0x1C7F, LBP_BA }, - { 0x1CC0, 0x1CC7, LBP_AL }, + { 0x1C80, 0x1CC7, LBP_AL }, { 0x1CD0, 0x1CD2, LBP_CM }, { 0x1CD3, 0x1CD3, LBP_AL }, { 0x1CD4, 0x1CE8, LBP_CM }, @@ -636,7 +640,9 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x2616, 0x2617, LBP_AI }, { 0x2618, 0x2618, LBP_ID }, { 0x2619, 0x2619, LBP_AL }, - { 0x261A, 0x261F, LBP_ID }, + { 0x261A, 0x261C, LBP_ID }, + { 0x261D, 0x261D, LBP_EB }, + { 0x261E, 0x261F, LBP_ID }, { 0x2620, 0x2638, LBP_AL }, { 0x2639, 0x263B, LBP_ID }, { 0x263C, 0x263F, LBP_AL }, @@ -681,18 +687,22 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x26EB, 0x26F0, LBP_AI }, { 0x26F1, 0x26F5, LBP_ID }, { 0x26F6, 0x26F6, LBP_AI }, - { 0x26F7, 0x26FA, LBP_ID }, + { 0x26F7, 0x26F8, LBP_ID }, + { 0x26F9, 0x26F9, LBP_EB }, + { 0x26FA, 0x26FA, LBP_ID }, { 0x26FB, 0x26FC, LBP_AI }, { 0x26FD, 0x2704, LBP_ID }, { 0x2705, 0x2707, LBP_AL }, - { 0x2708, 0x270D, LBP_ID }, + { 0x2708, 0x2709, LBP_ID }, + { 0x270A, 0x270D, LBP_EB }, { 0x270E, 0x2756, LBP_AL }, { 0x2757, 0x2757, LBP_AI }, { 0x2758, 0x275A, LBP_AL }, { 0x275B, 0x2760, LBP_QU }, { 0x2761, 0x2761, LBP_AL }, { 0x2762, 0x2763, LBP_EX }, - { 0x2764, 0x2767, LBP_AL }, + { 0x2764, 0x2764, LBP_ID }, + { 0x2765, 0x2767, LBP_AL }, { 0x2768, 0x2768, LBP_OP }, { 0x2769, 0x2769, LBP_CL }, { 0x276A, 0x276A, LBP_OP }, @@ -798,6 +808,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x2E3F, 0x2E3F, LBP_AL }, { 0x2E40, 0x2E41, LBP_BA }, { 0x2E42, 0x2E42, LBP_OP }, + { 0x2E43, 0x2E44, LBP_BA }, { 0x2E80, 0x2FFB, LBP_ID }, { 0x3000, 0x3000, LBP_BA }, { 0x3001, 0x3002, LBP_CL }, @@ -925,7 +936,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0xA876, 0xA877, LBP_EX }, { 0xA880, 0xA881, LBP_CM }, { 0xA882, 0xA8B3, LBP_AL }, - { 0xA8B4, 0xA8C4, LBP_CM }, + { 0xA8B4, 0xA8C5, LBP_CM }, { 0xA8CE, 0xA8CF, LBP_BA }, { 0xA8D0, 0xA8D9, LBP_NU }, { 0xA8E0, 0xA8F1, LBP_CM }, @@ -1863,11 +1874,11 @@ const struct LineBreakProperties lb_prop_default[] = { { 0xFF62, 0xFF62, LBP_OP }, { 0xFF63, 0xFF64, LBP_CL }, { 0xFF65, 0xFF65, LBP_NS }, - { 0xFF66, 0xFF66, LBP_AL }, + { 0xFF66, 0xFF66, LBP_ID }, { 0xFF67, 0xFF70, LBP_CJ }, - { 0xFF71, 0xFF9D, LBP_AL }, + { 0xFF71, 0xFF9D, LBP_ID }, { 0xFF9E, 0xFF9F, LBP_NS }, - { 0xFFA0, 0xFFDC, LBP_AL }, + { 0xFFA0, 0xFFDC, LBP_ID }, { 0xFFE0, 0xFFE0, LBP_PO }, { 0xFFE1, 0xFFE1, LBP_PR }, { 0xFFE2, 0xFFE4, LBP_ID }, @@ -1890,7 +1901,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x103D0, 0x103D0, LBP_BA }, { 0x103D1, 0x1049D, LBP_AL }, { 0x104A0, 0x104A9, LBP_NU }, - { 0x10500, 0x10855, LBP_AL }, + { 0x104B0, 0x10855, LBP_AL }, { 0x10857, 0x10857, LBP_BA }, { 0x10858, 0x1091B, LBP_AL }, { 0x1091F, 0x1091F, LBP_BA }, @@ -1951,7 +1962,9 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x11238, 0x11239, LBP_BA }, { 0x1123A, 0x1123A, LBP_AL }, { 0x1123B, 0x1123C, LBP_BA }, - { 0x1123D, 0x112A8, LBP_AL }, + { 0x1123D, 0x1123D, LBP_AL }, + { 0x1123E, 0x1123E, LBP_CM }, + { 0x11280, 0x112A8, LBP_AL }, { 0x112A9, 0x112A9, LBP_BA }, { 0x112B0, 0x112DE, LBP_AL }, { 0x112DF, 0x112EA, LBP_CM }, @@ -1965,7 +1978,14 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x11357, 0x11357, LBP_CM }, { 0x1135D, 0x11361, LBP_AL }, { 0x11362, 0x11374, LBP_CM }, - { 0x11480, 0x114AF, LBP_AL }, + { 0x11400, 0x11434, LBP_AL }, + { 0x11435, 0x11446, LBP_CM }, + { 0x11447, 0x1144A, LBP_AL }, + { 0x1144B, 0x1144E, LBP_BA }, + { 0x1144F, 0x1144F, LBP_AL }, + { 0x11450, 0x11459, LBP_NU }, + { 0x1145B, 0x1145B, LBP_BA }, + { 0x1145D, 0x114AF, LBP_AL }, { 0x114B0, 0x114C3, LBP_CM }, { 0x114C4, 0x114C7, LBP_AL }, { 0x114D0, 0x114D9, LBP_NU }, @@ -1983,6 +2003,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x11641, 0x11642, LBP_BA }, { 0x11643, 0x11644, LBP_AL }, { 0x11650, 0x11659, LBP_NU }, + { 0x11660, 0x1166C, LBP_BB }, { 0x11680, 0x116AA, LBP_AL }, { 0x116AB, 0x116B7, LBP_CM }, { 0x116C0, 0x116C9, LBP_NU }, @@ -1993,7 +2014,17 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1173F, 0x1173F, LBP_SA }, { 0x118A0, 0x118DF, LBP_AL }, { 0x118E0, 0x118E9, LBP_NU }, - { 0x118EA, 0x1246E, LBP_AL }, + { 0x118EA, 0x11C2E, LBP_AL }, + { 0x11C2F, 0x11C3F, LBP_CM }, + { 0x11C40, 0x11C40, LBP_AL }, + { 0x11C41, 0x11C45, LBP_BA }, + { 0x11C50, 0x11C59, LBP_NU }, + { 0x11C5A, 0x11C6C, LBP_AL }, + { 0x11C70, 0x11C70, LBP_BB }, + { 0x11C71, 0x11C71, LBP_EX }, + { 0x11C72, 0x11C8F, LBP_AL }, + { 0x11C92, 0x11CB6, LBP_CM }, + { 0x12000, 0x1246E, LBP_AL }, { 0x12470, 0x12474, LBP_BA }, { 0x12480, 0x13257, LBP_AL }, { 0x13258, 0x1325A, LBP_OP }, @@ -2027,7 +2058,8 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x16B5B, 0x16F50, LBP_AL }, { 0x16F51, 0x16F92, LBP_CM }, { 0x16F93, 0x16F9F, LBP_AL }, - { 0x1B000, 0x1B001, LBP_ID }, + { 0x16FE0, 0x16FE0, LBP_NS }, + { 0x17000, 0x1B001, LBP_ID }, { 0x1BC00, 0x1BC9C, LBP_AL }, { 0x1BC9D, 0x1BC9E, LBP_CM }, { 0x1BC9F, 0x1BC9F, LBP_BA }, @@ -2055,32 +2087,65 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1DA85, 0x1DA86, LBP_AL }, { 0x1DA87, 0x1DA8A, LBP_BA }, { 0x1DA8B, 0x1DA8B, LBP_AL }, - { 0x1DA9B, 0x1DAAF, LBP_CM }, + { 0x1DA9B, 0x1E02A, LBP_CM }, { 0x1E800, 0x1E8CF, LBP_AL }, { 0x1E8D0, 0x1E8D6, LBP_CM }, + { 0x1E900, 0x1E943, LBP_AL }, + { 0x1E944, 0x1E94A, LBP_CM }, + { 0x1E950, 0x1E959, LBP_NU }, + { 0x1E95E, 0x1E95F, LBP_OP }, { 0x1EE00, 0x1EEF1, LBP_AL }, - { 0x1F000, 0x1F0F5, LBP_ID }, - { 0x1F100, 0x1F12D, LBP_AI }, + { 0x1F000, 0x1F0FF, LBP_ID }, + { 0x1F100, 0x1F10C, LBP_AI }, + { 0x1F10D, 0x1F10F, LBP_ID }, + { 0x1F110, 0x1F12D, LBP_AI }, { 0x1F12E, 0x1F12E, LBP_AL }, + { 0x1F12F, 0x1F12F, LBP_ID }, { 0x1F130, 0x1F169, LBP_AI }, { 0x1F16A, 0x1F16B, LBP_AL }, - { 0x1F170, 0x1F19A, LBP_AI }, + { 0x1F16C, 0x1F16F, LBP_ID }, + { 0x1F170, 0x1F1AC, LBP_AI }, + { 0x1F1AD, 0x1F1E5, LBP_ID }, { 0x1F1E6, 0x1F1FF, LBP_RI }, - { 0x1F200, 0x1F39B, LBP_ID }, + { 0x1F200, 0x1F384, LBP_ID }, + { 0x1F385, 0x1F385, LBP_EB }, + { 0x1F386, 0x1F39B, LBP_ID }, { 0x1F39C, 0x1F39D, LBP_AL }, { 0x1F39E, 0x1F3B4, LBP_ID }, { 0x1F3B5, 0x1F3B6, LBP_AL }, { 0x1F3B7, 0x1F3BB, LBP_ID }, { 0x1F3BC, 0x1F3BC, LBP_AL }, - { 0x1F3BD, 0x1F3FA, LBP_ID }, - { 0x1F3FB, 0x1F3FF, LBP_AL }, - { 0x1F400, 0x1F49F, LBP_ID }, + { 0x1F3BD, 0x1F3C2, LBP_ID }, + { 0x1F3C3, 0x1F3C4, LBP_EB }, + { 0x1F3C5, 0x1F3C9, LBP_ID }, + { 0x1F3CA, 0x1F3CB, LBP_EB }, + { 0x1F3CC, 0x1F3FA, LBP_ID }, + { 0x1F3FB, 0x1F3FF, LBP_EM }, + { 0x1F400, 0x1F441, LBP_ID }, + { 0x1F442, 0x1F443, LBP_EB }, + { 0x1F444, 0x1F445, LBP_ID }, + { 0x1F446, 0x1F450, LBP_EB }, + { 0x1F451, 0x1F465, LBP_ID }, + { 0x1F466, 0x1F469, LBP_EB }, + { 0x1F46A, 0x1F46D, LBP_ID }, + { 0x1F46E, 0x1F46E, LBP_EB }, + { 0x1F46F, 0x1F46F, LBP_ID }, + { 0x1F470, 0x1F478, LBP_EB }, + { 0x1F479, 0x1F47B, LBP_ID }, + { 0x1F47C, 0x1F47C, LBP_EB }, + { 0x1F47D, 0x1F480, LBP_ID }, + { 0x1F481, 0x1F483, LBP_EB }, + { 0x1F484, 0x1F484, LBP_ID }, + { 0x1F485, 0x1F487, LBP_EB }, + { 0x1F488, 0x1F49F, LBP_ID }, { 0x1F4A0, 0x1F4A0, LBP_AL }, { 0x1F4A1, 0x1F4A1, LBP_ID }, { 0x1F4A2, 0x1F4A2, LBP_AL }, { 0x1F4A3, 0x1F4A3, LBP_ID }, { 0x1F4A4, 0x1F4A4, LBP_AL }, - { 0x1F4A5, 0x1F4AE, LBP_ID }, + { 0x1F4A5, 0x1F4A9, LBP_ID }, + { 0x1F4AA, 0x1F4AA, LBP_EB }, + { 0x1F4AB, 0x1F4AE, LBP_ID }, { 0x1F4AF, 0x1F4AF, LBP_AL }, { 0x1F4B0, 0x1F4B0, LBP_ID }, { 0x1F4B1, 0x1F4B2, LBP_AL }, @@ -2090,18 +2155,57 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1F517, 0x1F524, LBP_AL }, { 0x1F525, 0x1F531, LBP_ID }, { 0x1F532, 0x1F549, LBP_AL }, - { 0x1F54A, 0x1F5D3, LBP_ID }, + { 0x1F54A, 0x1F574, LBP_ID }, + { 0x1F575, 0x1F575, LBP_EB }, + { 0x1F576, 0x1F579, LBP_ID }, + { 0x1F57A, 0x1F57A, LBP_EB }, + { 0x1F57B, 0x1F58F, LBP_ID }, + { 0x1F590, 0x1F590, LBP_EB }, + { 0x1F591, 0x1F594, LBP_ID }, + { 0x1F595, 0x1F596, LBP_EB }, + { 0x1F597, 0x1F5D3, LBP_ID }, { 0x1F5D4, 0x1F5DB, LBP_AL }, { 0x1F5DC, 0x1F5F3, LBP_ID }, { 0x1F5F4, 0x1F5F9, LBP_AL }, - { 0x1F5FA, 0x1F64F, LBP_ID }, + { 0x1F5FA, 0x1F644, LBP_ID }, + { 0x1F645, 0x1F647, LBP_EB }, + { 0x1F648, 0x1F64A, LBP_ID }, + { 0x1F64B, 0x1F64F, LBP_EB }, { 0x1F650, 0x1F675, LBP_AL }, { 0x1F676, 0x1F678, LBP_QU }, { 0x1F679, 0x1F67B, LBP_NS }, { 0x1F67C, 0x1F67F, LBP_AL }, - { 0x1F680, 0x1F6F3, LBP_ID }, - { 0x1F700, 0x1F8AD, LBP_AL }, - { 0x1F910, 0x3FFFD, LBP_ID }, + { 0x1F680, 0x1F6A2, LBP_ID }, + { 0x1F6A3, 0x1F6A3, LBP_EB }, + { 0x1F6A4, 0x1F6B3, LBP_ID }, + { 0x1F6B4, 0x1F6B6, LBP_EB }, + { 0x1F6B7, 0x1F6BF, LBP_ID }, + { 0x1F6C0, 0x1F6C0, LBP_EB }, + { 0x1F6C1, 0x1F6FF, LBP_ID }, + { 0x1F700, 0x1F773, LBP_AL }, + { 0x1F774, 0x1F77F, LBP_ID }, + { 0x1F780, 0x1F7D4, LBP_AL }, + { 0x1F7D5, 0x1F7FF, LBP_ID }, + { 0x1F800, 0x1F80B, LBP_AL }, + { 0x1F80C, 0x1F80F, LBP_ID }, + { 0x1F810, 0x1F847, LBP_AL }, + { 0x1F848, 0x1F84F, LBP_ID }, + { 0x1F850, 0x1F859, LBP_AL }, + { 0x1F85A, 0x1F85F, LBP_ID }, + { 0x1F860, 0x1F887, LBP_AL }, + { 0x1F888, 0x1F88F, LBP_ID }, + { 0x1F890, 0x1F8AD, LBP_AL }, + { 0x1F8AE, 0x1F917, LBP_ID }, + { 0x1F918, 0x1F91E, LBP_EB }, + { 0x1F91F, 0x1F925, LBP_ID }, + { 0x1F926, 0x1F926, LBP_EB }, + { 0x1F927, 0x1F92F, LBP_ID }, + { 0x1F930, 0x1F930, LBP_EB }, + { 0x1F931, 0x1F932, LBP_ID }, + { 0x1F933, 0x1F939, LBP_EB }, + { 0x1F93A, 0x1F93B, LBP_ID }, + { 0x1F93C, 0x1F93E, LBP_EB }, + { 0x1F93F, 0x3FFFD, LBP_ID }, { 0xE0001, 0xE01EF, LBP_CM }, { 0xF0000, 0x10FFFD, LBP_XX }, { 0xFFFFFFFF, 0xFFFFFFFF, LBP_Undefined } diff --git a/src/static_libs/libunibreak/linebreakdef.c b/src/static_libs/libunibreak/linebreakdef.c index daa7cb5..04c3db3 100644 --- a/src/static_libs/libunibreak/linebreakdef.c +++ b/src/static_libs/libunibreak/linebreakdef.c @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2015 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -30,9 +30,9 @@ * Unicode 5.0.0: * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> * - * This library has been updated according to Revision 33, for - * Unicode 7.0.0: - * <URL:http://www.unicode.org/reports/tr14/tr14-33.html> + * This library has been updated according to Revision 37, for + * Unicode 9.0.0: + * <URL:http://www.unicode.org/reports/tr14/tr14-37.html> * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> @@ -43,7 +43,7 @@ * * Definition of language-specific data. * - * @version 3.0, 2015/05/10 + * @version 3.1, 2016/09/10 * @author Wu Yongwei */ diff --git a/src/static_libs/libunibreak/linebreakdef.h b/src/static_libs/libunibreak/linebreakdef.h index 5c32aef..9432ed3 100644 --- a/src/static_libs/libunibreak/linebreakdef.h +++ b/src/static_libs/libunibreak/linebreakdef.h @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2015 Wu Yongwei <wuyongwei at gmail dot com> + * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com> * * This software is provided 'as-is', without any express or implied @@ -31,9 +31,9 @@ * Unicode 5.0.0: * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> * - * This library has been updated according to Revision 33, for - * Unicode 7.0.0: - * <URL:http://www.unicode.org/reports/tr14/tr14-33.html> + * This library has been updated according to Revision 37, for + * Unicode 9.0.0: + * <URL:http://www.unicode.org/reports/tr14/tr14-37.html> * * The Unicode Terms of Use are available at * <URL:http://www.unicode.org/copyright.html> @@ -45,7 +45,7 @@ * Definitions of internal data structures, declarations of global * variables, and function prototypes for the line breaking algorithm. * - * @version 3.0, 2015/05/10 + * @version 3.2, 2016/12/03 * @author Wu Yongwei * @author Petr Filipsky */ @@ -91,11 +91,17 @@ enum LineBreakClass LBP_JV, /**< Hangul V Jamo */ LBP_JT, /**< Hangul T Jamo */ LBP_RI, /**< Regional indicator */ + LBP_EB, /**< Emoji base */ + LBP_EM, /**< Emoji modifier */ + LBP_ZWJ, /**< Zero width joiner */ + + /* The following break class is treated in the pair table, but it is + * not part of Table 2 of UAX #14. */ + LBP_CB, /**< Contingent break */ /* The following break classes are not treated in the pair table */ LBP_AI, /**< Ambiguous (alphabetic or ideograph) */ LBP_BK, /**< Break (mandatory) */ - LBP_CB, /**< Contingent break */ LBP_CJ, /**< Conditional Japanese starter */ LBP_CR, /**< Carriage return */ LBP_LF, /**< Line feed */ @@ -123,9 +129,9 @@ struct LineBreakProperties */ struct LineBreakPropertiesLang { - const char *lang; /**< Language name */ - size_t namelen; /**< Length of name to match */ - const struct LineBreakProperties *lbp; /**< Pointer to associated data */ + const char *lang; /**< Language name */ + size_t namelen; /**< Length of name to match */ + const struct LineBreakProperties *lbp; /**< Pointer to associated data */ }; /** @@ -140,6 +146,7 @@ struct LineBreakContext enum LineBreakClass lbcNew; /**< Breaking class of next codepoint */ enum LineBreakClass lbcLast; /**< Breaking class of last codepoint */ int fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */ + int cLb30aRI; /**< Count of RI characters (LB30a) */ }; /* Declarations */ diff --git a/src/static_libs/libunibreak/wordbreak.c b/src/static_libs/libunibreak/wordbreak.c index a0d1c43..063badc 100644 --- a/src/static_libs/libunibreak/wordbreak.c +++ b/src/static_libs/libunibreak/wordbreak.c @@ -162,6 +162,8 @@ static void set_wordbreaks( char *brks, get_next_char_t get_next_char) { + /* Counter of how many time we cam across RI */ + int riCounter = 0; enum WordBreakClass wbcLast = WBP_Undefined; /* wbcSeqStart is the class that started the current sequence. * WBP_Undefined is a special case that means "sot". @@ -218,6 +220,25 @@ static void set_wordbreaks( posLast = posCur; break; + case WBP_E_Base_GAZ: + case WBP_Glue_After_Zwj: + /* WB3c */ + if (wbcLast == WBP_ZWJ) + { + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_NOBREAK, get_next_char); + } + /* No rule found, reset */ + else + { + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + } + wbcSeqStart = wbcCur; + posLast = posCur; + break; + + case WBP_ZWJ: case WBP_Extend: case WBP_Format: /* WB4 - If not the first char/after a newline (WB3a,3b), skip @@ -228,13 +249,18 @@ static void set_wordbreaks( set_brks_to(s, brks, posLast, posCur, len, WORDBREAK_BREAK, get_next_char); wbcSeqStart = wbcCur; + posLast = posCur; } else { /* It's surely not the first */ brks[posCur - 1] = WORDBREAK_NOBREAK; - /* "inherit" the previous class. */ - wbcCur = wbcLast; + /* WB3c precedes 4, so no intervening Extend chars allowed. */ + if (wbcSeqStart != WBP_ZWJ) + { + /* "inherit" the previous class. */ + wbcCur = wbcLast; + } } break; @@ -384,13 +410,48 @@ static void set_wordbreaks( posLast = posCur; break; - case WBP_Regional_Indicator: - /* WB13c */ - if (wbcSeqStart == WBP_Regional_Indicator) + case WBP_E_Base: + /* No rule found, reset */ + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + wbcSeqStart = wbcCur; + posLast = posCur; + break; + + case WBP_E_Modifier: + /* WB14 */ + if ((wbcLast == WBP_E_Base) || + (wbcLast == WBP_E_Base_GAZ)) { set_brks_to(s, brks, posLast, posCur, len, WORDBREAK_NOBREAK, get_next_char); } + /* No rule found, reset */ + else + { + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + } + wbcSeqStart = wbcCur; + posLast = posCur; + break; + + case WBP_Regional_Indicator: + /* WB15,16 */ + if ((wbcSeqStart == WBP_Regional_Indicator) && + ((riCounter % 2) == 1)) + { + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_NOBREAK, get_next_char); + riCounter = 0; /* Reset the sequence */ + } + /* No rule found, reset */ + else + { + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + riCounter = 1; + } wbcSeqStart = wbcCur; posLast = posCur; break; diff --git a/src/static_libs/libunibreak/wordbreakdata.c b/src/static_libs/libunibreak/wordbreakdata.c index 9049f59..99fcff5 100644 --- a/src/static_libs/libunibreak/wordbreakdata.c +++ b/src/static_libs/libunibreak/wordbreakdata.c @@ -1,6 +1,6 @@ /* The content of this file is generated from: -# WordBreakProperty-8.0.0.txt -# Date: 2015-02-14, 10:26:15 GMT [MD] +# WordBreakProperty-9.0.0.txt +# Date: 2016-06-01, 10:34:38 GMT */ #include "wordbreakdef.h" @@ -121,6 +121,9 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0840, 0x0858, WBP_ALetter}, {0x0859, 0x085B, WBP_Extend}, {0x08A0, 0x08B4, WBP_ALetter}, + {0x08B6, 0x08BD, WBP_ALetter}, + {0x08D4, 0x08E1, WBP_Extend}, + {0x08E2, 0x08E2, WBP_Format}, {0x08E3, 0x0902, WBP_Extend}, {0x0903, 0x0903, WBP_Extend}, {0x0904, 0x0939, WBP_ALetter}, @@ -264,6 +267,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0C60, 0x0C61, WBP_ALetter}, {0x0C62, 0x0C63, WBP_Extend}, {0x0C66, 0x0C6F, WBP_Numeric}, + {0x0C80, 0x0C80, WBP_ALetter}, {0x0C81, 0x0C81, WBP_Extend}, {0x0C82, 0x0C83, WBP_Extend}, {0x0C85, 0x0C8C, WBP_ALetter}, @@ -298,6 +302,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0D4A, 0x0D4C, WBP_Extend}, {0x0D4D, 0x0D4D, WBP_Extend}, {0x0D4E, 0x0D4E, WBP_ALetter}, + {0x0D54, 0x0D56, WBP_ALetter}, {0x0D57, 0x0D57, WBP_Extend}, {0x0D5F, 0x0D61, WBP_ALetter}, {0x0D62, 0x0D63, WBP_Extend}, @@ -422,7 +427,9 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1820, 0x1842, WBP_ALetter}, {0x1843, 0x1843, WBP_ALetter}, {0x1844, 0x1877, WBP_ALetter}, - {0x1880, 0x18A8, WBP_ALetter}, + {0x1880, 0x1884, WBP_ALetter}, + {0x1885, 0x1886, WBP_Extend}, + {0x1887, 0x18A8, WBP_ALetter}, {0x18A9, 0x18A9, WBP_Extend}, {0x18AA, 0x18AA, WBP_ALetter}, {0x18B0, 0x18F5, WBP_ALetter}, @@ -501,6 +508,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1C50, 0x1C59, WBP_Numeric}, {0x1C5A, 0x1C77, WBP_ALetter}, {0x1C78, 0x1C7D, WBP_ALetter}, + {0x1C80, 0x1C88, WBP_ALetter}, {0x1CD0, 0x1CD2, WBP_Extend}, {0x1CD4, 0x1CE0, WBP_Extend}, {0x1CE1, 0x1CE1, WBP_Extend}, @@ -519,7 +527,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1D79, 0x1D9A, WBP_ALetter}, {0x1D9B, 0x1DBF, WBP_ALetter}, {0x1DC0, 0x1DF5, WBP_Extend}, - {0x1DFC, 0x1DFF, WBP_Extend}, + {0x1DFB, 0x1DFF, WBP_Extend}, {0x1E00, 0x1F15, WBP_ALetter}, {0x1F18, 0x1F1D, WBP_ALetter}, {0x1F20, 0x1F45, WBP_ALetter}, @@ -539,7 +547,8 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1FE0, 0x1FEC, WBP_ALetter}, {0x1FF2, 0x1FF4, WBP_ALetter}, {0x1FF6, 0x1FFC, WBP_ALetter}, - {0x200C, 0x200D, WBP_Extend}, + {0x200C, 0x200C, WBP_Extend}, + {0x200D, 0x200D, WBP_ZWJ}, {0x200E, 0x200F, WBP_Format}, {0x2018, 0x2018, WBP_MidNumLet}, {0x2019, 0x2019, WBP_MidNumLet}, @@ -548,6 +557,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x2028, 0x2028, WBP_Newline}, {0x2029, 0x2029, WBP_Newline}, {0x202A, 0x202E, WBP_Format}, + {0x202F, 0x202F, WBP_ExtendNumLet}, {0x203F, 0x2040, WBP_ExtendNumLet}, {0x2044, 0x2044, WBP_MidNum}, {0x2054, 0x2054, WBP_ExtendNumLet}, @@ -580,6 +590,10 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x2183, 0x2184, WBP_ALetter}, {0x2185, 0x2188, WBP_ALetter}, {0x24B6, 0x24E9, WBP_ALetter}, + {0x261D, 0x261D, WBP_E_Base}, + {0x26F9, 0x26F9, WBP_E_Base}, + {0x270A, 0x270D, WBP_E_Base}, + {0x2764, 0x2764, WBP_Glue_After_Zwj}, {0x2C00, 0x2C2E, WBP_ALetter}, {0x2C30, 0x2C5E, WBP_ALetter}, {0x2C60, 0x2C7B, WBP_ALetter}, @@ -652,7 +666,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0xA788, 0xA788, WBP_ALetter}, {0xA78B, 0xA78E, WBP_ALetter}, {0xA78F, 0xA78F, WBP_ALetter}, - {0xA790, 0xA7AD, WBP_ALetter}, + {0xA790, 0xA7AE, WBP_ALetter}, {0xA7B0, 0xA7B7, WBP_ALetter}, {0xA7F7, 0xA7F7, WBP_ALetter}, {0xA7F8, 0xA7F9, WBP_ALetter}, @@ -671,7 +685,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0xA880, 0xA881, WBP_Extend}, {0xA882, 0xA8B3, WBP_ALetter}, {0xA8B4, 0xA8C3, WBP_Extend}, - {0xA8C4, 0xA8C4, WBP_Extend}, + {0xA8C4, 0xA8C5, WBP_Extend}, {0xA8D0, 0xA8D9, WBP_Numeric}, {0xA8E0, 0xA8F1, WBP_Extend}, {0xA8F2, 0xA8F7, WBP_ALetter}, @@ -820,6 +834,8 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x10400, 0x1044F, WBP_ALetter}, {0x10450, 0x1049D, WBP_ALetter}, {0x104A0, 0x104A9, WBP_Numeric}, + {0x104B0, 0x104D3, WBP_ALetter}, + {0x104D8, 0x104FB, WBP_ALetter}, {0x10500, 0x10527, WBP_ALetter}, {0x10530, 0x10563, WBP_ALetter}, {0x10600, 0x10736, WBP_ALetter}, @@ -904,6 +920,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x11234, 0x11234, WBP_Extend}, {0x11235, 0x11235, WBP_Extend}, {0x11236, 0x11237, WBP_Extend}, + {0x1123E, 0x1123E, WBP_Extend}, {0x11280, 0x11286, WBP_ALetter}, {0x11288, 0x11288, WBP_ALetter}, {0x1128A, 0x1128D, WBP_ALetter}, @@ -935,6 +952,15 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x11362, 0x11363, WBP_Extend}, {0x11366, 0x1136C, WBP_Extend}, {0x11370, 0x11374, WBP_Extend}, + {0x11400, 0x11434, WBP_ALetter}, + {0x11435, 0x11437, WBP_Extend}, + {0x11438, 0x1143F, WBP_Extend}, + {0x11440, 0x11441, WBP_Extend}, + {0x11442, 0x11444, WBP_Extend}, + {0x11445, 0x11445, WBP_Extend}, + {0x11446, 0x11446, WBP_Extend}, + {0x11447, 0x1144A, WBP_ALetter}, + {0x11450, 0x11459, WBP_Numeric}, {0x11480, 0x114AF, WBP_ALetter}, {0x114B0, 0x114B2, WBP_Extend}, {0x114B3, 0x114B8, WBP_Extend}, @@ -984,6 +1010,23 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x118E0, 0x118E9, WBP_Numeric}, {0x118FF, 0x118FF, WBP_ALetter}, {0x11AC0, 0x11AF8, WBP_ALetter}, + {0x11C00, 0x11C08, WBP_ALetter}, + {0x11C0A, 0x11C2E, WBP_ALetter}, + {0x11C2F, 0x11C2F, WBP_Extend}, + {0x11C30, 0x11C36, WBP_Extend}, + {0x11C38, 0x11C3D, WBP_Extend}, + {0x11C3E, 0x11C3E, WBP_Extend}, + {0x11C3F, 0x11C3F, WBP_Extend}, + {0x11C40, 0x11C40, WBP_ALetter}, + {0x11C50, 0x11C59, WBP_Numeric}, + {0x11C72, 0x11C8F, WBP_ALetter}, + {0x11C92, 0x11CA7, WBP_Extend}, + {0x11CA9, 0x11CA9, WBP_Extend}, + {0x11CAA, 0x11CB0, WBP_Extend}, + {0x11CB1, 0x11CB1, WBP_Extend}, + {0x11CB2, 0x11CB3, WBP_Extend}, + {0x11CB4, 0x11CB4, WBP_Extend}, + {0x11CB5, 0x11CB6, WBP_Extend}, {0x12000, 0x12399, WBP_ALetter}, {0x12400, 0x1246E, WBP_ALetter}, {0x12480, 0x12543, WBP_ALetter}, @@ -1005,6 +1048,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x16F51, 0x16F7E, WBP_Extend}, {0x16F8F, 0x16F92, WBP_Extend}, {0x16F93, 0x16F9F, WBP_ALetter}, + {0x16FE0, 0x16FE0, WBP_ALetter}, {0x1B000, 0x1B000, WBP_Katakana}, {0x1BC00, 0x1BC6A, WBP_ALetter}, {0x1BC70, 0x1BC7C, WBP_ALetter}, @@ -1057,8 +1101,16 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1DA84, 0x1DA84, WBP_Extend}, {0x1DA9B, 0x1DA9F, WBP_Extend}, {0x1DAA1, 0x1DAAF, WBP_Extend}, + {0x1E000, 0x1E006, WBP_Extend}, + {0x1E008, 0x1E018, WBP_Extend}, + {0x1E01B, 0x1E021, WBP_Extend}, + {0x1E023, 0x1E024, WBP_Extend}, + {0x1E026, 0x1E02A, WBP_Extend}, {0x1E800, 0x1E8C4, WBP_ALetter}, {0x1E8D0, 0x1E8D6, WBP_Extend}, + {0x1E900, 0x1E943, WBP_ALetter}, + {0x1E944, 0x1E94A, WBP_Extend}, + {0x1E950, 0x1E959, WBP_Numeric}, {0x1EE00, 0x1EE03, WBP_ALetter}, {0x1EE05, 0x1EE1F, WBP_ALetter}, {0x1EE21, 0x1EE22, WBP_ALetter}, @@ -1096,8 +1148,37 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1F150, 0x1F169, WBP_ALetter}, {0x1F170, 0x1F189, WBP_ALetter}, {0x1F1E6, 0x1F1FF, WBP_Regional_Indicator}, + {0x1F385, 0x1F385, WBP_E_Base}, + {0x1F3C3, 0x1F3C4, WBP_E_Base}, + {0x1F3CA, 0x1F3CB, WBP_E_Base}, + {0x1F3FB, 0x1F3FF, WBP_E_Modifier}, + {0x1F442, 0x1F443, WBP_E_Base}, + {0x1F446, 0x1F450, WBP_E_Base}, + {0x1F466, 0x1F469, WBP_E_Base_GAZ}, + {0x1F46E, 0x1F46E, WBP_E_Base}, + {0x1F470, 0x1F478, WBP_E_Base}, + {0x1F47C, 0x1F47C, WBP_E_Base}, + {0x1F481, 0x1F483, WBP_E_Base}, + {0x1F485, 0x1F487, WBP_E_Base}, + {0x1F48B, 0x1F48B, WBP_Glue_After_Zwj}, + {0x1F4AA, 0x1F4AA, WBP_E_Base}, + {0x1F575, 0x1F575, WBP_E_Base}, + {0x1F57A, 0x1F57A, WBP_E_Base}, + {0x1F590, 0x1F590, WBP_E_Base}, + {0x1F595, 0x1F596, WBP_E_Base}, + {0x1F5E8, 0x1F5E8, WBP_Glue_After_Zwj}, + {0x1F645, 0x1F647, WBP_E_Base}, + {0x1F64B, 0x1F64F, WBP_E_Base}, + {0x1F6A3, 0x1F6A3, WBP_E_Base}, + {0x1F6B4, 0x1F6B6, WBP_E_Base}, + {0x1F6C0, 0x1F6C0, WBP_E_Base}, + {0x1F918, 0x1F91E, WBP_E_Base}, + {0x1F926, 0x1F926, WBP_E_Base}, + {0x1F930, 0x1F930, WBP_E_Base}, + {0x1F933, 0x1F939, WBP_E_Base}, + {0x1F93C, 0x1F93E, WBP_E_Base}, {0xE0001, 0xE0001, WBP_Format}, - {0xE0020, 0xE007F, WBP_Format}, + {0xE0020, 0xE007F, WBP_Extend}, {0xE0100, 0xE01EF, WBP_Extend}, {0xFFFFFFFF, 0xFFFFFFFF, WBP_Undefined} }; diff --git a/src/static_libs/libunibreak/wordbreakdef.h b/src/static_libs/libunibreak/wordbreakdef.h index ab3cea1..b00aff3 100644 --- a/src/static_libs/libunibreak/wordbreakdef.h +++ b/src/static_libs/libunibreak/wordbreakdef.h @@ -61,6 +61,7 @@ enum WordBreakClass WBP_LF, WBP_Newline, WBP_Extend, + WBP_ZWJ, WBP_Regional_Indicator, WBP_Format, WBP_Katakana, @@ -73,6 +74,10 @@ enum WordBreakClass WBP_MidNum, WBP_Numeric, WBP_ExtendNumLet, + WBP_E_Base, + WBP_E_Modifier, + WBP_Glue_After_Zwj, + WBP_E_Base_GAZ, WBP_Any }; --