src/Makefile.am | 3 src/hb-ot-shape-complex-arabic.cc | 2 src/hb-ot-shape-complex-default.cc | 184 ----------------------------- src/hb-ot-shape-complex-hangul.cc | 232 +++++++++++++++++++++++++++++++++++++ src/hb-ot-shape-complex-hebrew.cc | 172 +++++++++++++++++++++++++++ src/hb-ot-shape-complex-indic.cc | 8 - src/hb-ot-shape-complex-myanmar.cc | 9 - src/hb-ot-shape-complex-private.hh | 38 +++--- src/hb-ot-shape-complex-sea.cc | 9 - src/hb-ot-shape-complex-thai.cc | 4 src/hb-ot-shape-complex-tibetan.cc | 61 +++++++++ src/hb-ot-shape-normalize.cc | 4 src/hb-private.hh | 6 13 files changed, 504 insertions(+), 228 deletions(-)
New commits: commit f14bb7de631b20e2868fb62e5311cd0d9e24bb49 Author: Behdad Esfahbod <beh...@behdad.org> Date: Tue Dec 31 16:49:15 2013 +0800 [ot] Separate out hebrew and tibetan shapers from default Now default shaper is truly no-op. diff --git a/src/Makefile.am b/src/Makefile.am index 62544db..c650b11 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -93,6 +93,7 @@ HBSOURCES += \ hb-ot-shape-complex-arabic-table.hh \ hb-ot-shape-complex-default.cc \ hb-ot-shape-complex-hangul.cc \ + hb-ot-shape-complex-hebrew.cc \ hb-ot-shape-complex-indic.cc \ hb-ot-shape-complex-indic-machine.hh \ hb-ot-shape-complex-indic-private.hh \ @@ -102,6 +103,7 @@ HBSOURCES += \ hb-ot-shape-complex-sea.cc \ hb-ot-shape-complex-sea-machine.hh \ hb-ot-shape-complex-thai.cc \ + hb-ot-shape-complex-tibetan.cc \ hb-ot-shape-complex-private.hh \ hb-ot-shape-normalize-private.hh \ hb-ot-shape-normalize.cc \ diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc index aca01bd..f7f097e 100644 --- a/src/hb-ot-shape-complex-default.cc +++ b/src/hb-ot-shape-complex-default.cc @@ -27,174 +27,17 @@ #include "hb-ot-shape-complex-private.hh" -/* TODO Add kana, and other small shapers here */ - - -/* The default shaper *only* adds additional per-script features.*/ - -static const hb_tag_t tibetan_features[] = -{ - HB_TAG('a','b','v','s'), - HB_TAG('b','l','w','s'), - HB_TAG('a','b','v','m'), - HB_TAG('b','l','w','m'), - HB_TAG_NONE -}; - -static void -collect_features_default (hb_ot_shape_planner_t *plan) -{ - const hb_tag_t *script_features = NULL; - - switch ((hb_tag_t) plan->props.script) - { - /* Unicode-2.0 additions */ - case HB_SCRIPT_TIBETAN: - script_features = tibetan_features; - break; - } - - for (; script_features && *script_features; script_features++) - plan->map.add_global_bool_feature (*script_features); -} - -static bool -compose_default (const hb_ot_shape_normalize_context_t *c, - hb_codepoint_t a, - hb_codepoint_t b, - hb_codepoint_t *ab) -{ - /* Hebrew presentation-form shaping. - * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 - * Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA; - * Note that some letters do not have a dagesh presForm encoded. - */ - static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = { - 0xFB30, /* ALEF */ - 0xFB31, /* BET */ - 0xFB32, /* GIMEL */ - 0xFB33, /* DALET */ - 0xFB34, /* HE */ - 0xFB35, /* VAV */ - 0xFB36, /* ZAYIN */ - 0x0000, /* HET */ - 0xFB38, /* TET */ - 0xFB39, /* YOD */ - 0xFB3A, /* FINAL KAF */ - 0xFB3B, /* KAF */ - 0xFB3C, /* LAMED */ - 0x0000, /* FINAL MEM */ - 0xFB3E, /* MEM */ - 0x0000, /* FINAL NUN */ - 0xFB40, /* NUN */ - 0xFB41, /* SAMEKH */ - 0x0000, /* AYIN */ - 0xFB43, /* FINAL PE */ - 0xFB44, /* PE */ - 0x0000, /* FINAL TSADI */ - 0xFB46, /* TSADI */ - 0xFB47, /* QOF */ - 0xFB48, /* RESH */ - 0xFB49, /* SHIN */ - 0xFB4A /* TAV */ - }; - - bool found = c->unicode->compose (a, b, ab); - - if (!found && (b & ~0x7F) == 0x0580) { - /* Special-case Hebrew presentation forms that are excluded from - * standard normalization, but wanted for old fonts. */ - switch (b) { - case 0x05B4: /* HIRIQ */ - if (a == 0x05D9) { /* YOD */ - *ab = 0xFB1D; - found = true; - } - break; - case 0x05B7: /* patah */ - if (a == 0x05F2) { /* YIDDISH YOD YOD */ - *ab = 0xFB1F; - found = true; - } else if (a == 0x05D0) { /* ALEF */ - *ab = 0xFB2E; - found = true; - } - break; - case 0x05B8: /* QAMATS */ - if (a == 0x05D0) { /* ALEF */ - *ab = 0xFB2F; - found = true; - } - break; - case 0x05B9: /* HOLAM */ - if (a == 0x05D5) { /* VAV */ - *ab = 0xFB4B; - found = true; - } - break; - case 0x05BC: /* DAGESH */ - if (a >= 0x05D0 && a <= 0x05EA) { - *ab = sDageshForms[a - 0x05D0]; - found = (*ab != 0); - } else if (a == 0xFB2A) { /* SHIN WITH SHIN DOT */ - *ab = 0xFB2C; - found = true; - } else if (a == 0xFB2B) { /* SHIN WITH SIN DOT */ - *ab = 0xFB2D; - found = true; - } - break; - case 0x05BF: /* RAFE */ - switch (a) { - case 0x05D1: /* BET */ - *ab = 0xFB4C; - found = true; - break; - case 0x05DB: /* KAF */ - *ab = 0xFB4D; - found = true; - break; - case 0x05E4: /* PE */ - *ab = 0xFB4E; - found = true; - break; - } - break; - case 0x05C1: /* SHIN DOT */ - if (a == 0x05E9) { /* SHIN */ - *ab = 0xFB2A; - found = true; - } else if (a == 0xFB49) { /* SHIN WITH DAGESH */ - *ab = 0xFB2C; - found = true; - } - break; - case 0x05C2: /* SIN DOT */ - if (a == 0x05E9) { /* SHIN */ - *ab = 0xFB2B; - found = true; - } else if (a == 0xFB49) { /* SHIN WITH DAGESH */ - *ab = 0xFB2D; - found = true; - } - break; - } - } - - return found; -} - const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default = { "default", - collect_features_default, + NULL, /* collect_features */ NULL, /* override_features */ NULL, /* data_create */ NULL, /* data_destroy */ NULL, /* preprocess_text */ HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, NULL, /* decompose */ - compose_default, + NULL, /* compose */ NULL, /* setup_masks */ HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT, true, /* fallback_position */ diff --git a/src/hb-ot-shape-complex-hebrew.cc b/src/hb-ot-shape-complex-hebrew.cc new file mode 100644 index 0000000..efef8c1 --- /dev/null +++ b/src/hb-ot-shape-complex-hebrew.cc @@ -0,0 +1,172 @@ +/* + * Copyright © 2010,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-ot-shape-complex-private.hh" + + +static bool +compose_hebrew (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab) +{ + /* Hebrew presentation-form shaping. + * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 + * Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA; + * Note that some letters do not have a dagesh presForm encoded. + */ + static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = { + 0xFB30, /* ALEF */ + 0xFB31, /* BET */ + 0xFB32, /* GIMEL */ + 0xFB33, /* DALET */ + 0xFB34, /* HE */ + 0xFB35, /* VAV */ + 0xFB36, /* ZAYIN */ + 0x0000, /* HET */ + 0xFB38, /* TET */ + 0xFB39, /* YOD */ + 0xFB3A, /* FINAL KAF */ + 0xFB3B, /* KAF */ + 0xFB3C, /* LAMED */ + 0x0000, /* FINAL MEM */ + 0xFB3E, /* MEM */ + 0x0000, /* FINAL NUN */ + 0xFB40, /* NUN */ + 0xFB41, /* SAMEKH */ + 0x0000, /* AYIN */ + 0xFB43, /* FINAL PE */ + 0xFB44, /* PE */ + 0x0000, /* FINAL TSADI */ + 0xFB46, /* TSADI */ + 0xFB47, /* QOF */ + 0xFB48, /* RESH */ + 0xFB49, /* SHIN */ + 0xFB4A /* TAV */ + }; + + bool found = c->unicode->compose (a, b, ab); + + if (!found) + { + /* Special-case Hebrew presentation forms that are excluded from + * standard normalization, but wanted for old fonts. */ + switch (b) { + case 0x05B4: /* HIRIQ */ + if (a == 0x05D9) { /* YOD */ + *ab = 0xFB1D; + found = true; + } + break; + case 0x05B7: /* patah */ + if (a == 0x05F2) { /* YIDDISH YOD YOD */ + *ab = 0xFB1F; + found = true; + } else if (a == 0x05D0) { /* ALEF */ + *ab = 0xFB2E; + found = true; + } + break; + case 0x05B8: /* QAMATS */ + if (a == 0x05D0) { /* ALEF */ + *ab = 0xFB2F; + found = true; + } + break; + case 0x05B9: /* HOLAM */ + if (a == 0x05D5) { /* VAV */ + *ab = 0xFB4B; + found = true; + } + break; + case 0x05BC: /* DAGESH */ + if (a >= 0x05D0 && a <= 0x05EA) { + *ab = sDageshForms[a - 0x05D0]; + found = (*ab != 0); + } else if (a == 0xFB2A) { /* SHIN WITH SHIN DOT */ + *ab = 0xFB2C; + found = true; + } else if (a == 0xFB2B) { /* SHIN WITH SIN DOT */ + *ab = 0xFB2D; + found = true; + } + break; + case 0x05BF: /* RAFE */ + switch (a) { + case 0x05D1: /* BET */ + *ab = 0xFB4C; + found = true; + break; + case 0x05DB: /* KAF */ + *ab = 0xFB4D; + found = true; + break; + case 0x05E4: /* PE */ + *ab = 0xFB4E; + found = true; + break; + } + break; + case 0x05C1: /* SHIN DOT */ + if (a == 0x05E9) { /* SHIN */ + *ab = 0xFB2A; + found = true; + } else if (a == 0xFB49) { /* SHIN WITH DAGESH */ + *ab = 0xFB2C; + found = true; + } + break; + case 0x05C2: /* SIN DOT */ + if (a == 0x05E9) { /* SHIN */ + *ab = 0xFB2B; + found = true; + } else if (a == 0xFB49) { /* SHIN WITH DAGESH */ + *ab = 0xFB2D; + found = true; + } + break; + } + } + + return found; +} + + +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hebrew = +{ + "hebrew", + NULL, /* collect_features */ + NULL, /* override_features */ + NULL, /* data_create */ + NULL, /* data_destroy */ + NULL, /* preprocess_text */ + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, + NULL, /* decompose */ + compose_hebrew, + NULL, /* setup_masks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT, + true, /* fallback_position */ +}; diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh index 44387f5..104726e 100644 --- a/src/hb-ot-shape-complex-private.hh +++ b/src/hb-ot-shape-complex-private.hh @@ -55,10 +55,12 @@ enum hb_ot_shape_zero_width_marks_type_t { HB_COMPLEX_SHAPER_IMPLEMENT (default) /* should be first */ \ HB_COMPLEX_SHAPER_IMPLEMENT (arabic) \ HB_COMPLEX_SHAPER_IMPLEMENT (hangul) \ + HB_COMPLEX_SHAPER_IMPLEMENT (hebrew) \ HB_COMPLEX_SHAPER_IMPLEMENT (indic) \ HB_COMPLEX_SHAPER_IMPLEMENT (myanmar) \ HB_COMPLEX_SHAPER_IMPLEMENT (sea) \ HB_COMPLEX_SHAPER_IMPLEMENT (thai) \ + HB_COMPLEX_SHAPER_IMPLEMENT (tibetan) \ /* ^--- Add new shapers here */ @@ -193,6 +195,18 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner) return &_hb_ot_complex_shaper_hangul; + /* Unicode-2.0 additions */ + case HB_SCRIPT_TIBETAN: + + return &_hb_ot_complex_shaper_tibetan; + + + /* Unicode-1.1 additions */ + case HB_SCRIPT_HEBREW: + + return &_hb_ot_complex_shaper_hebrew; + + /* ^--- Add new shapers here */ @@ -230,9 +244,6 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner) case HB_SCRIPT_LAO: case HB_SCRIPT_THAI: - /* Unicode-2.0 additions */ - case HB_SCRIPT_TIBETAN: - /* Unicode-3.2 additions */ case HB_SCRIPT_TAGALOG: case HB_SCRIPT_TAGBANWA: diff --git a/src/hb-ot-shape-complex-tibetan.cc b/src/hb-ot-shape-complex-tibetan.cc new file mode 100644 index 0000000..01465a4 --- /dev/null +++ b/src/hb-ot-shape-complex-tibetan.cc @@ -0,0 +1,61 @@ +/* + * Copyright © 2010,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-ot-shape-complex-private.hh" + + +static const hb_tag_t tibetan_features[] = +{ + HB_TAG('a','b','v','s'), + HB_TAG('b','l','w','s'), + HB_TAG('a','b','v','m'), + HB_TAG('b','l','w','m'), + HB_TAG_NONE +}; + +static void +collect_features_tibetan (hb_ot_shape_planner_t *plan) +{ + for (const hb_tag_t *script_features = tibetan_features; script_features && *script_features; script_features++) + plan->map.add_global_bool_feature (*script_features); +} + + +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_tibetan = +{ + "default", + collect_features_tibetan, + NULL, /* override_features */ + NULL, /* data_create */ + NULL, /* data_destroy */ + NULL, /* preprocess_text */ + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, + NULL, /* decompose */ + NULL, /* compose */ + NULL, /* setup_masks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT, + true, /* fallback_position */ +}; commit 6300cd72539284ca294ee8286bbbb7f9c72af320 Author: Behdad Esfahbod <beh...@behdad.org> Date: Tue Dec 31 16:38:47 2013 +0800 [ot] Define HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc index dfb28d8..aca01bd 100644 --- a/src/hb-ot-shape-complex-default.cc +++ b/src/hb-ot-shape-complex-default.cc @@ -196,6 +196,6 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default = NULL, /* decompose */ compose_default, NULL, /* setup_masks */ - HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE, + HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT, true, /* fallback_position */ }; diff --git a/src/hb-ot-shape-complex-hangul.cc b/src/hb-ot-shape-complex-hangul.cc index 093f465..94cf5e5 100644 --- a/src/hb-ot-shape-complex-hangul.cc +++ b/src/hb-ot-shape-complex-hangul.cc @@ -227,6 +227,6 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul = NULL, /* decompose */ NULL, /* compose */ NULL, /* setup_masks */ - HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE, + HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT, false, /* fallback_position */ }; diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh index 320c4e6..44387f5 100644 --- a/src/hb-ot-shape-complex-private.hh +++ b/src/hb-ot-shape-complex-private.hh @@ -44,7 +44,9 @@ enum hb_ot_shape_zero_width_marks_type_t { // HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_EARLY, HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE, HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY, - HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE + HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, + + HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT = HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE }; diff --git a/src/hb-ot-shape-complex-thai.cc b/src/hb-ot-shape-complex-thai.cc index 04ca7ac..8664eca 100644 --- a/src/hb-ot-shape-complex-thai.cc +++ b/src/hb-ot-shape-complex-thai.cc @@ -373,6 +373,6 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai = NULL, /* decompose */ NULL, /* compose */ NULL, /* setup_masks */ - HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE, + HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT, false,/* fallback_position */ }; commit 3d6ca0d32e5c6597acfcf59301cb1905586ddb52 Author: Behdad Esfahbod <beh...@behdad.org> Date: Tue Dec 31 16:04:35 2013 +0800 [ot] Simplify normalization_preference again No shaper has more than one behavior re this, so no need for a callback. diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc index 4f6c86e..f576720 100644 --- a/src/hb-ot-shape-complex-arabic.cc +++ b/src/hb-ot-shape-complex-arabic.cc @@ -366,7 +366,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic = data_create_arabic, data_destroy_arabic, NULL, /* preprocess_text_arabic */ - NULL, /* normalization_preference */ + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, NULL, /* decompose */ NULL, /* compose */ setup_masks_arabic, diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc index 519790c..dfb28d8 100644 --- a/src/hb-ot-shape-complex-default.cc +++ b/src/hb-ot-shape-complex-default.cc @@ -58,12 +58,6 @@ collect_features_default (hb_ot_shape_planner_t *plan) plan->map.add_global_bool_feature (*script_features); } -static hb_ot_shape_normalization_mode_t -normalization_preference_default (const hb_segment_properties_t *props) -{ - return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS; -} - static bool compose_default (const hb_ot_shape_normalize_context_t *c, hb_codepoint_t a, @@ -198,7 +192,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default = NULL, /* data_create */ NULL, /* data_destroy */ NULL, /* preprocess_text */ - normalization_preference_default, + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, NULL, /* decompose */ compose_default, NULL, /* setup_masks */ diff --git a/src/hb-ot-shape-complex-hangul.cc b/src/hb-ot-shape-complex-hangul.cc index 1b89f20..093f465 100644 --- a/src/hb-ot-shape-complex-hangul.cc +++ b/src/hb-ot-shape-complex-hangul.cc @@ -223,7 +223,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul = NULL, /* data_create */ NULL, /* data_destroy */ preprocess_text_hangul, - NULL, /* normalization_preference */ + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, NULL, /* decompose */ NULL, /* compose */ NULL, /* setup_masks */ diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 9edefe3..1e07d33 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -1690,12 +1690,6 @@ clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED, } -static hb_ot_shape_normalization_mode_t -normalization_preference_indic (const hb_segment_properties_t *props HB_UNUSED) -{ - return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT; -} - static bool decompose_indic (const hb_ot_shape_normalize_context_t *c, hb_codepoint_t ab, @@ -1806,7 +1800,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = data_create_indic, data_destroy_indic, NULL, /* preprocess_text */ - normalization_preference_indic, + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, decompose_indic, compose_indic, setup_masks_indic, diff --git a/src/hb-ot-shape-complex-myanmar.cc b/src/hb-ot-shape-complex-myanmar.cc index 25ba726..50209ff 100644 --- a/src/hb-ot-shape-complex-myanmar.cc +++ b/src/hb-ot-shape-complex-myanmar.cc @@ -541,13 +541,6 @@ final_reordering (const hb_ot_shape_plan_t *plan, } -static hb_ot_shape_normalization_mode_t -normalization_preference_myanmar (const hb_segment_properties_t *props HB_UNUSED) -{ - return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT; -} - - const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar = { "myanmar", @@ -556,7 +549,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar = NULL, /* data_create */ NULL, /* data_destroy */ NULL, /* preprocess_text */ - normalization_preference_myanmar, + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, NULL, /* decompose */ NULL, /* compose */ setup_masks_myanmar, diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh index 17b95e2..320c4e6 100644 --- a/src/hb-ot-shape-complex-private.hh +++ b/src/hb-ot-shape-complex-private.hh @@ -106,12 +106,7 @@ struct hb_ot_complex_shaper_t hb_font_t *font); - /* normalization_preference() - * Called during shape(). - * May be NULL. - */ - hb_ot_shape_normalization_mode_t - (*normalization_preference) (const hb_segment_properties_t *props); + hb_ot_shape_normalization_mode_t normalization_preference; /* decompose() * Called during shape()'s normalization. diff --git a/src/hb-ot-shape-complex-sea.cc b/src/hb-ot-shape-complex-sea.cc index da687ed..6288a90 100644 --- a/src/hb-ot-shape-complex-sea.cc +++ b/src/hb-ot-shape-complex-sea.cc @@ -360,13 +360,6 @@ final_reordering (const hb_ot_shape_plan_t *plan, } -static hb_ot_shape_normalization_mode_t -normalization_preference_sea (const hb_segment_properties_t *props HB_UNUSED) -{ - return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT; -} - - const hb_ot_complex_shaper_t _hb_ot_complex_shaper_sea = { "sea", @@ -375,7 +368,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_sea = NULL, /* data_create */ NULL, /* data_destroy */ NULL, /* preprocess_text */ - normalization_preference_sea, + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, NULL, /* decompose */ NULL, /* compose */ setup_masks_sea, diff --git a/src/hb-ot-shape-complex-thai.cc b/src/hb-ot-shape-complex-thai.cc index 4594533..04ca7ac 100644 --- a/src/hb-ot-shape-complex-thai.cc +++ b/src/hb-ot-shape-complex-thai.cc @@ -369,7 +369,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai = NULL, /* data_create */ NULL, /* data_destroy */ preprocess_text_thai, - NULL, /* normalization_preference */ + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, NULL, /* decompose */ NULL, /* compose */ NULL, /* setup_masks */ diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc index 6531e1b..9f3cd76 100644 --- a/src/hb-ot-shape-normalize.cc +++ b/src/hb-ot-shape-normalize.cc @@ -289,9 +289,7 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer, hb_font_t *font) { - hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference ? - plan->shaper->normalization_preference (&buffer->props) : - HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT; + hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference; const hb_ot_shape_normalize_context_t c = { plan, buffer, commit c98b7183f7dc453d5bac1f2503017cded317a495 Author: Behdad Esfahbod <beh...@behdad.org> Date: Tue Dec 31 15:55:40 2013 +0800 [ot] Add Hangul shaper Not exhaustively tested, but I think I got the intended logic right. The logic can perhaps be simplified. Maybe we should disabled normalization with this shaper. Then again, for now focusing on correctness. diff --git a/src/Makefile.am b/src/Makefile.am index 67a328c..62544db 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -92,6 +92,7 @@ HBSOURCES += \ hb-ot-shape-complex-arabic-fallback.hh \ hb-ot-shape-complex-arabic-table.hh \ hb-ot-shape-complex-default.cc \ + hb-ot-shape-complex-hangul.cc \ hb-ot-shape-complex-indic.cc \ hb-ot-shape-complex-indic-machine.hh \ hb-ot-shape-complex-indic-private.hh \ diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc index d6afa0e..519790c 100644 --- a/src/hb-ot-shape-complex-default.cc +++ b/src/hb-ot-shape-complex-default.cc @@ -32,14 +32,6 @@ /* The default shaper *only* adds additional per-script features.*/ -static const hb_tag_t hangul_features[] = -{ - HB_TAG('l','j','m','o'), - HB_TAG('v','j','m','o'), - HB_TAG('t','j','m','o'), - HB_TAG_NONE -}; - static const hb_tag_t tibetan_features[] = { HB_TAG('a','b','v','s'), @@ -56,11 +48,6 @@ collect_features_default (hb_ot_shape_planner_t *plan) switch ((hb_tag_t) plan->props.script) { - /* Unicode-1.1 additions */ - case HB_SCRIPT_HANGUL: - script_features = hangul_features; - break; - /* Unicode-2.0 additions */ case HB_SCRIPT_TIBETAN: script_features = tibetan_features; diff --git a/src/hb-ot-shape-complex-hangul.cc b/src/hb-ot-shape-complex-hangul.cc new file mode 100644 index 0000000..1b89f20 --- /dev/null +++ b/src/hb-ot-shape-complex-hangul.cc @@ -0,0 +1,232 @@ +/* + * Copyright © 2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-ot-shape-complex-private.hh" + + +/* Hangul shaper */ + + +static const hb_tag_t hangul_features[] = +{ + HB_TAG('l','j','m','o'), + HB_TAG('v','j','m','o'), + HB_TAG('t','j','m','o'), + HB_TAG_NONE +}; + +static void +collect_features_hangul (hb_ot_shape_planner_t *plan) +{ + for (const hb_tag_t *script_features = hangul_features; script_features && *script_features; script_features++) + plan->map.add_global_bool_feature (*script_features); +} + +#define LBase 0x1100 +#define VBase 0x1161 +#define TBase 0x11A7 +#define LCount 19 +#define VCount 21 +#define TCount 28 +#define SBase 0xAC00 +#define NCount (VCount * TCount) +#define SCount (LCount * NCount) + +#define isCombiningL(u) (hb_in_range<hb_codepoint_t> ((u), LBase, LBase+LCount-1)) +#define isCombiningV(u) (hb_in_range<hb_codepoint_t> ((u), VBase, VBase+VCount-1)) +#define isCombiningT(u) (hb_in_range<hb_codepoint_t> ((u), TBase+1, TBase+TCount-1)) +#define isCombinedS(u) (hb_in_range<hb_codepoint_t> ((u), SBase, SBase+SCount-1)) + +#define isT(u) (hb_in_ranges<hb_codepoint_t> ((u), 0x11A8, 0x11FF, 0xD7C8, 0xD7FF)) + +static void +preprocess_text_hangul (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font) +{ + /* Hangul syllables come in two shapes: LV, and LVT. Of those: + * + * - LV can be precomposed, or decomposed. Lets call those + * <LV> and <L,V>, + * - LVT can be fully precomposed, partically precomposed, or + * fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>. + * + * The composition / decomposition is mechanical. However, not + * all <L,V> sequences compose, and not all <LV,T> sequences + * compose. + * + * Here are the specifics: + * + * - <L>: U+1100..115F, U+A960..A97F + * - <V>: U+1160..11A7, U+D7B0..D7C7 + * - <T>: U+11A8..11FF, U+D7C8..D7FF + * + * - Only the <L,V> sequences for the 11xx ranges combine. + * - Only <LV,T> sequences for T in U+11A8..11C3 combine. + * + * Here is what we want to accomplish in this shaper: + * + * - If the whole syllable can be precomposed, do that, + * - Otherwise, fully decompose. + * + * That is, of the different possible syllables: + * + * <L> + * <L,V> + * <L,V,T> + * <LV> + * <LVT> + * <LV, T> + * + * - <L> needs no work. + * + * - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we + * should fully decompose them if font supports. + * + * - <L,V> and <L,V,T> we should compose if the whole thing can be composed. + * + * - <LV,T> we should compose if the whole thing can be composed, otherwise we should + * decompose. + */ + + buffer->clear_output (); + unsigned int count = buffer->len; + for (buffer->idx = 0; buffer->idx < count;) + { + hb_codepoint_t u = buffer->cur().codepoint; + + if (isCombiningL(u) && buffer->idx + 1 < count) + { + hb_codepoint_t l = u; + hb_codepoint_t v = buffer->cur(+1).codepoint; + if (isCombiningV(v)) + { + /* Have <L,V> or <L,V,T>. */ + unsigned int len = 2; + unsigned int tindex = 0; + if (buffer->idx + 2 < count) + { + hb_codepoint_t t = buffer->cur(+2).codepoint; + if (isCombiningT(t)) + { + len = 3; + tindex = t - TBase; + } + else if (isT (t)) + { + /* Old T jamo. Doesn't combine. Don't combine *anything*. */ + len = 0; + } + } + + if (len) + { + hb_codepoint_t s = SBase + (l - LBase) * NCount + (v - VBase) * TCount + tindex; + hb_codepoint_t glyph; + if (font->get_glyph (s, 0, &glyph)) + { + buffer->replace_glyphs (len, 1, &s); + if (unlikely (buffer->in_error)) + return; + continue; + } + } + } + } + + else if (isCombinedS(u)) + { + /* Have <LV>, <LVT>, or <LV,T> */ + hb_codepoint_t s = u; + hb_codepoint_t glyph; + bool has_glyph = font->get_glyph (s, 0, &glyph); + unsigned int lindex = (s - SBase) / NCount; + unsigned int nindex = (s - SBase) % NCount; + unsigned int vindex = nindex / VCount; + unsigned int tindex = nindex % VCount; + + if (tindex && has_glyph) + goto next; /* <LVT> supported. Nothing to do. */ + + if (!tindex && + buffer->idx + 1 < count && + isCombiningT (buffer->cur(+1).codepoint)) + { + /* <LV,T>, try to combine. */ + tindex = buffer->cur(+1).codepoint - TBase; + hb_codepoint_t new_s = s + tindex; + if (font->get_glyph (new_s, 0, &glyph)) + { + buffer->replace_glyphs (2, 1, &new_s); + if (unlikely (buffer->in_error)) + return; + continue; + } + } + + /* Otherwise, decompose if font doesn't support <LV>, + * or if having non-combining <LV,T>. Note that we + * already handled combining <LV,T> above. */ + if (!has_glyph || + (buffer->idx + 1 < count && + isT (buffer->cur(+1).codepoint))) + { + hb_codepoint_t decomposed[3] = {LBase + lindex, + VBase + vindex, + TBase + tindex}; + if (font->get_glyph (decomposed[0], 0, &glyph) && + font->get_glyph (decomposed[1], 0, &glyph) && + (tindex && font->get_glyph (decomposed[2], 0, &glyph))) + { + buffer->replace_glyphs (1, tindex ? 3 : 2, decomposed); + if (unlikely (buffer->in_error)) + return; + continue; + } + } + } + + next: + buffer->next_glyph (); + } + buffer->swap_buffers (); +} + +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul = +{ + "hangul", + collect_features_hangul, + NULL, /* override_features */ + NULL, /* data_create */ + NULL, /* data_destroy */ + preprocess_text_hangul, + NULL, /* normalization_preference */ + NULL, /* decompose */ + NULL, /* compose */ + NULL, /* setup_masks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_UNICODE_LATE, + false, /* fallback_position */ +}; diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh index ac0072b..17b95e2 100644 --- a/src/hb-ot-shape-complex-private.hh +++ b/src/hb-ot-shape-complex-private.hh @@ -52,6 +52,7 @@ enum hb_ot_shape_zero_width_marks_type_t { #define HB_COMPLEX_SHAPERS_IMPLEMENT_SHAPERS \ HB_COMPLEX_SHAPER_IMPLEMENT (default) /* should be first */ \ HB_COMPLEX_SHAPER_IMPLEMENT (arabic) \ + HB_COMPLEX_SHAPER_IMPLEMENT (hangul) \ HB_COMPLEX_SHAPER_IMPLEMENT (indic) \ HB_COMPLEX_SHAPER_IMPLEMENT (myanmar) \ HB_COMPLEX_SHAPER_IMPLEMENT (sea) \ @@ -189,19 +190,10 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner) return &_hb_ot_complex_shaper_thai; -#if 0 - /* Note: - * Currently we don't have a separate Hangul shaper. The default shaper handles - * Hangul by enabling jamo features. We may want to implement a separate shaper - * in the future. See this thread for details of what such a shaper would do: - * - * http://lists.freedesktop.org/archives/harfbuzz/2013-April/003070.html - */ /* Unicode-1.1 additions */ case HB_SCRIPT_HANGUL: return &_hb_ot_complex_shaper_hangul; -#endif /* ^--- Add new shapers here */ diff --git a/src/hb-private.hh b/src/hb-private.hh index 4b72260..680b21e 100644 --- a/src/hb-private.hh +++ b/src/hb-private.hh @@ -808,6 +808,12 @@ hb_in_range (T u, T lo, T hi) } template <typename T> static inline bool +hb_in_ranges (T u, T lo1, T hi1, T lo2, T hi2) +{ + return hb_in_range (u, lo1, hi1) || hb_in_range (u, lo2, hi2); +} + +template <typename T> static inline bool hb_in_ranges (T u, T lo1, T hi1, T lo2, T hi2, T lo3, T hi3) { return hb_in_range (u, lo1, hi1) || hb_in_range (u, lo2, hi2) || hb_in_range (u, lo3, hi3);
_______________________________________________ HarfBuzz mailing list HarfBuzz@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/harfbuzz