docs/harfbuzz-sections.txt | 5 src/hb-blob-private.hh | 4 src/hb-dsalgs.hh | 3 src/hb-face.cc | 149 ++++++++++++++ src/hb-face.h | 14 + src/hb-machinery-private.hh | 83 ++++++++ src/hb-ot-cmap-table.hh | 243 +++++++++++++++--------- src/hb-subset-plan.cc | 2 src/hb-subset-plan.hh | 2 src/hb-subset-private.hh | 7 src/hb-subset.cc | 186 +++++------------- src/hb-subset.h | 16 + test/api/test-subset-codepoints.c | 18 - test/fuzzing/hb-subset-get-codepoints-fuzzer.cc | 2 test/shaping/data/in-house/tests/fuzzed.tests | 2 15 files changed, 494 insertions(+), 242 deletions(-)
New commits: commit c8cfb702e96bf4e89495fcc56f965c88bfa77dca Author: Behdad Esfahbod <beh...@behdad.org> Date: Sat Aug 25 16:14:32 2018 -0700 [cmap] Minor diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index 31f3b309..dcdff008 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -292,11 +292,13 @@ struct CmapSubtableFormat4 } inline void collect_unicodes (hb_set_t *out) const { - for (unsigned int i = 0; i < this->segCount; i++) + unsigned int count = this->segCount; + if (count && this->startCount[count - 1] == 0xFFFFu) + count--; /* Skip sentinel segment. */ + for (unsigned int i = 0; i < count; i++) { /* XXX This does NOT skip over chars mapping to gid0... */ - if (this->startCount[i] != 0xFFFFu || this->endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF) - out->add_range (this->startCount[i], this->endCount[i]); + out->add_range (this->startCount[i], this->endCount[i]); } } commit 1dcf5fb038e7c2d3d03a50c4cd9869a922f9adf7 Author: Behdad Esfahbod <beh...@behdad.org> Date: Sat Aug 25 16:11:26 2018 -0700 [cmap Add hb_subset_collect_variation_unicodes() To be moved to hb-face.h later. diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index 2f115e2e..31f3b309 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -296,7 +296,7 @@ struct CmapSubtableFormat4 { /* XXX This does NOT skip over chars mapping to gid0... */ if (this->startCount[i] != 0xFFFFu || this->endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF) - hb_set_add_range (out, this->startCount[i], this->endCount[i]); + out->add_range (this->startCount[i], this->endCount[i]); } } @@ -459,11 +459,9 @@ struct CmapSubtableLongSegmented inline void collect_unicodes (hb_set_t *out) const { for (unsigned int i = 0; i < this->groups.len; i++) { - hb_set_add_range (out, - MIN ((unsigned int) this->groups[i].startCharCode, - (unsigned int) HB_MAX_UNICODE_CODEPOINT_VALUE), - MIN ((unsigned int) this->groups[i].endCharCode, - (unsigned int) HB_MAX_UNICODE_CODEPOINT_VALUE)); + out->add_range (this->groups[i].startCharCode, + MIN ((hb_codepoint_t) this->groups[i].endCharCode, + (hb_codepoint_t) HB_MAX_UNICODE_CODEPOINT_VALUE)); } } @@ -600,7 +598,23 @@ struct UnicodeValueRange DEFINE_SIZE_STATIC (4); }; -typedef SortedArrayOf<UnicodeValueRange, HBUINT32> DefaultUVS; +struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32> +{ + inline void collect_unicodes (hb_set_t *out) const + { + unsigned int count = len; + for (unsigned int i = 0; i < count; i++) + { + hb_codepoint_t first = arrayZ[i].startUnicodeValue; + hb_codepoint_t last = MIN ((hb_codepoint_t) (first + arrayZ[i].additionalCount), + (hb_codepoint_t) HB_MAX_UNICODE_CODEPOINT_VALUE); + out->add_range (first, last); + } + } + + public: + DEFINE_SIZE_ARRAY (4, arrayZ); +}; struct UVSMapping { @@ -621,7 +635,18 @@ struct UVSMapping DEFINE_SIZE_STATIC (5); }; -typedef SortedArrayOf<UVSMapping, HBUINT32> NonDefaultUVS; +struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32> +{ + inline void collect_unicodes (hb_set_t *out) const + { + unsigned int count = len; + for (unsigned int i = 0; i < count; i++) + out->add (arrayZ[i].glyphID); + } + + public: + DEFINE_SIZE_ARRAY (4, arrayZ); +}; struct VariationSelectorRecord { @@ -644,6 +669,12 @@ struct VariationSelectorRecord return GLYPH_VARIANT_NOT_FOUND; } + inline void collect_unicodes (hb_set_t *out, const void *base) const + { + (base+defaultUVS).collect_unicodes (out); + (base+nonDefaultUVS).collect_unicodes (out); + } + inline int cmp (const hb_codepoint_t &variation_selector) const { return varSelector.cmp (variation_selector); @@ -672,7 +703,7 @@ struct CmapSubtableFormat14 hb_codepoint_t variation_selector, hb_codepoint_t *glyph) const { - return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this); + return record[record.bsearch (variation_selector)].get_glyph (codepoint, glyph, this); } inline void collect_variation_selectors (hb_set_t *out) const @@ -681,6 +712,11 @@ struct CmapSubtableFormat14 for (unsigned int i = 0; i < count; i++) out->add (record.arrayZ[i].varSelector); } + inline void collect_variation_unicodes (hb_codepoint_t variation_selector, + hb_set_t *out) const + { + record[record.bsearch (variation_selector)].collect_unicodes (out, this); + } inline bool sanitize (hb_sanitize_context_t *c) const { @@ -1045,6 +1081,11 @@ struct cmap { subtable_uvs->collect_variation_selectors (out); } + inline void collect_variation_unicodes (hb_codepoint_t variation_selector, + hb_set_t *out) const + { + subtable_uvs->collect_variation_unicodes (variation_selector, out); + } protected: typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj, diff --git a/src/hb-subset.cc b/src/hb-subset.cc index 01d9b89a..77d9e81c 100644 --- a/src/hb-subset.cc +++ b/src/hb-subset.cc @@ -263,7 +263,8 @@ hb_subset_collect_unicodes (hb_face_t *face, hb_set_t *out) * Since: REPLACEME */ void -hb_subset_collect_variation_selectors (hb_face_t *face, hb_set_t *out) +hb_subset_collect_variation_selectors (hb_face_t *face, + hb_set_t *out) { /* XXX Use saved accel. */ OT::cmap::accelerator_t cmap; @@ -271,3 +272,25 @@ hb_subset_collect_variation_selectors (hb_face_t *face, hb_set_t *out) cmap.collect_variation_selectors (out); cmap.fini(); } + +/** + * hb_subset_collect_variation_unicodes: + * @face: font face. + * @out: set to add Unicode characters for @variation_selector covered by @face to. + * + * + * + * Since: REPLACEME + */ +void +hb_subset_collect_variation_unicodes (hb_face_t *face, + hb_codepoint_t variation_selector, + hb_set_t *out) +{ + /* XXX Use saved accel. */ + OT::cmap::accelerator_t cmap; + cmap.init (face); + cmap.collect_variation_unicodes (variation_selector, out); + cmap.fini(); +} + diff --git a/src/hb-subset.h b/src/hb-subset.h index 745bacf2..1e7d8f52 100644 --- a/src/hb-subset.h +++ b/src/hb-subset.h @@ -84,10 +84,16 @@ hb_subset (hb_face_t *source, /* TODO Move to hb-face.h. */ HB_EXTERN void -hb_subset_collect_unicodes (hb_face_t *source, hb_set_t *out); +hb_subset_collect_unicodes (hb_face_t *face, hb_set_t *out); HB_EXTERN void -hb_subset_collect_variation_selectors (hb_face_t *source, hb_set_t *out); +hb_subset_collect_variation_selectors (hb_face_t *face, + hb_set_t *out); + +HB_EXTERN void +hb_subset_collect_variation_unicodes (hb_face_t *face, + hb_codepoint_t variation_selector, + hb_set_t *out); HB_END_DECLS commit 4806b3800d77603c203e8bb2e88baeb2b3a14f05 Author: Behdad Esfahbod <beh...@behdad.org> Date: Sat Aug 25 15:56:07 2018 -0700 [cmap] Add hb_subset_collect_variation_selectors() To be moved to hb-face later. diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index bf9874a3..2f115e2e 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -594,7 +594,7 @@ struct UnicodeValueRange } HBUINT24 startUnicodeValue; /* First value in this range. */ - HBUINT8 additionalCount; /* Number of additional values in this + HBUINT8 additionalCount; /* Number of additional values in this * range. */ public: DEFINE_SIZE_STATIC (4); @@ -675,6 +675,13 @@ struct CmapSubtableFormat14 return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this); } + inline void collect_variation_selectors (hb_set_t *out) const + { + unsigned int count = record.len; + for (unsigned int i = 0; i < count; i++) + out->add (record.arrayZ[i].varSelector); + } + inline bool sanitize (hb_sanitize_context_t *c) const { TRACE_SANITIZE (this); @@ -977,7 +984,7 @@ struct cmap /* Meh. */ if (!subtable_uvs) subtable_uvs = &Null(CmapSubtableFormat14); - this->uvs_table = subtable_uvs; + this->subtable_uvs = subtable_uvs; this->get_glyph_data = subtable; if (unlikely (symbol)) @@ -1018,7 +1025,7 @@ struct cmap hb_codepoint_t variation_selector, hb_codepoint_t *glyph) const { - switch (this->uvs_table->get_glyph_variant (unicode, + switch (this->subtable_uvs->get_glyph_variant (unicode, variation_selector, glyph)) { @@ -1034,6 +1041,10 @@ struct cmap { subtable->collect_unicodes (out); } + inline void collect_variation_selectors (hb_set_t *out) const + { + subtable_uvs->collect_variation_selectors (out); + } protected: typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj, @@ -1073,12 +1084,13 @@ struct cmap private: const CmapSubtable *subtable; + const CmapSubtableFormat14 *subtable_uvs; + hb_cmap_get_glyph_func_t get_glyph_func; const void *get_glyph_data; CmapSubtableFormat4::accelerator_t format4_accel; - const CmapSubtableFormat14 *uvs_table; hb_blob_t *blob; }; diff --git a/src/hb-subset.cc b/src/hb-subset.cc index 2e991de2..01d9b89a 100644 --- a/src/hb-subset.cc +++ b/src/hb-subset.cc @@ -238,14 +238,36 @@ hb_subset (hb_face_t *source, /** * hb_subset_collect_unicodes: - * @source: font face data to load. - * @out: set to add the all codepoints covered by font face, source. + * @face: font face. + * @out: set to add Unicode characters covered by @face to. + * + * Since: REPLACEME */ void -hb_subset_collect_unicodes (hb_face_t *source, hb_set_t *out) +hb_subset_collect_unicodes (hb_face_t *face, hb_set_t *out) { + /* XXX Use saved accel. */ OT::cmap::accelerator_t cmap; - cmap.init (source); + cmap.init (face); cmap.collect_unicodes (out); cmap.fini(); } + +/** + * hb_subset_collect_variation_selectors: + * @face: font face. + * @out: set to add Variation Selector characters covered by @face to. + * + * + * + * Since: REPLACEME + */ +void +hb_subset_collect_variation_selectors (hb_face_t *face, hb_set_t *out) +{ + /* XXX Use saved accel. */ + OT::cmap::accelerator_t cmap; + cmap.init (face); + cmap.collect_variation_selectors (out); + cmap.fini(); +} diff --git a/src/hb-subset.h b/src/hb-subset.h index b79b8094..745bacf2 100644 --- a/src/hb-subset.h +++ b/src/hb-subset.h @@ -80,10 +80,16 @@ hb_subset (hb_face_t *source, hb_subset_profile_t *profile, hb_subset_input_t *input); -/* hb_subset_collect_unicodes */ + +/* TODO Move to hb-face.h. */ + HB_EXTERN void hb_subset_collect_unicodes (hb_face_t *source, hb_set_t *out); +HB_EXTERN void +hb_subset_collect_variation_selectors (hb_face_t *source, hb_set_t *out); + + HB_END_DECLS #endif /* HB_SUBSET_H */ commit 3336de24790ac1a12852ac2a3e2fff4d6bb19bc7 Author: Behdad Esfahbod <beh...@behdad.org> Date: Sat Aug 25 15:41:25 2018 -0700 [cmap] Remove unused code diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index a767354f..bf9874a3 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -1039,8 +1039,6 @@ struct cmap typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph); - typedef void (*hb_cmap_collect_unicodes_func_t) (const void *obj, - hb_set_t *out); template <typename Type> static inline bool get_glyph_from (const void *obj, @@ -1052,14 +1050,6 @@ struct cmap } template <typename Type> - static inline void collect_unicodes_from (const void *obj, - hb_set_t *out) - { - const Type *typed_obj = (const Type *) obj; - typed_obj->collect_unicodes (out); - } - - template <typename Type> static inline bool get_glyph_from_symbol (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph) commit 1becabe06c0c58aaf69a9ba641508e77a60f3451 Author: Behdad Esfahbod <beh...@behdad.org> Date: Sat Aug 25 15:37:56 2018 -0700 [cmap] Use bsearch to find subtables diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index 173d62e6..a767354f 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -1101,10 +1101,7 @@ struct cmap key.platformID.set (platform_id); key.encodingID.set (encoding_id); - /* Note: We can use bsearch, but since it has no performance - * implications, we use lsearch and as such accept fonts with - * unsorted subtable list. */ - int result = encodingRecord./*bsearch*/lsearch (key); + int result = encodingRecord.bsearch (key); if (result == -1 || !encodingRecord[result].subtable) return nullptr; diff --git a/test/shaping/data/in-house/tests/fuzzed.tests b/test/shaping/data/in-house/tests/fuzzed.tests index 43a19334..a6ce93d0 100644 --- a/test/shaping/data/in-house/tests/fuzzed.tests +++ b/test/shaping/data/in-house/tests/fuzzed.tests @@ -10,7 +10,7 @@ ../fonts/fab39d60d758cb586db5a504f218442cd1395725.ttf:--font-funcs=ot:U+0041,U+0041:[gid0=0+1000|gid0=1+1000] ../fonts/205edd09bd3d141cc9580f650109556cc28b22cb.ttf:--font-funcs=ot:U+0041:[gid0=0+1000] ../fonts/217a934cfe15c548b572c203dceb2befdf026462.ttf:--font-funcs=ot:U+0061,U+0061,U+0061:[] -../fonts/558661aa659912f4d30ecd27bd09835171a8e2b0.ttf:--font-funcs=ot:U+FFFD,U+E0100,U+FFFD,U+E0010:[] +../fonts/558661aa659912f4d30ecd27bd09835171a8e2b0.ttf:--font-funcs=ot:U+FFFD,U+E0100,U+FFFD,U+E0010:[gid3584=0+1000|gid1024=0+1000|gid1=0+1000|gid8=0+1000|gid3=0+1000|gid0=0+1000|gid1=0+1000|gid3584=0+1000|gid3584=2+1000|gid1024=2+1000|gid1=2+1000|gid8=2+1000|gid3=2+1000|gid0=2+1000|gid1=2+1000|gid3584=2+1000] ../fonts/a34a9191d9376bda419836effeef7e75c1386016.ttf:--font-funcs=ot:U+0041:[] ../fonts/a69118c2c2ada48ff803d9149daa54c9ebdae30e.ttf:--font-funcs=ot:U+0041:[gid0=0+1229] ../fonts/b6acef662e0beb8d5fcf5b61c6b0ca69537b7402.ttf:--font-funcs=ot:U+0041:[gid0=0+1000] commit 02fe03e09a3258e07d2d6749990f6d31dd2a8525 Author: Behdad Esfahbod <beh...@behdad.org> Date: Sat Aug 25 15:33:05 2018 -0700 [cmap] Factor out find_best_subtable diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index 1152c8ea..173d62e6 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -928,6 +928,35 @@ struct cmap return result; } + const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const + { + if (symbol) *symbol = false; + + const CmapSubtable *subtable; + + /* 32-bit subtables. */ + if ((subtable = this->find_subtable (3, 10))) return subtable; + if ((subtable = this->find_subtable (0, 6))) return subtable; + if ((subtable = this->find_subtable (0, 4))) return subtable; + + /* 16-bit subtables. */ + if ((subtable = this->find_subtable (3, 1))) return subtable; + if ((subtable = this->find_subtable (0, 3))) return subtable; + if ((subtable = this->find_subtable (0, 2))) return subtable; + if ((subtable = this->find_subtable (0, 1))) return subtable; + if ((subtable = this->find_subtable (0, 0))) return subtable; + + /* Symbol subtable. */ + if ((subtable = this->find_subtable (3, 0))) + { + if (symbol) *symbol = true; + return subtable; + } + + /* Meh. */ + return &Null(CmapSubtable); + } + struct accelerator_t { inline void init (hb_face_t *face) @@ -935,27 +964,8 @@ struct cmap this->blob = hb_sanitize_context_t().reference_table<cmap> (face); const cmap *table = this->blob->as<cmap> (); const CmapSubtableFormat14 *subtable_uvs = nullptr; - - subtable = nullptr; - - bool symbol = false; - /* 32-bit subtables. */ - if (!subtable) subtable = table->find_subtable (3, 10); - if (!subtable) subtable = table->find_subtable (0, 6); - if (!subtable) subtable = table->find_subtable (0, 4); - /* 16-bit subtables. */ - if (!subtable) subtable = table->find_subtable (3, 1); - if (!subtable) subtable = table->find_subtable (0, 3); - if (!subtable) subtable = table->find_subtable (0, 2); - if (!subtable) subtable = table->find_subtable (0, 1); - if (!subtable) subtable = table->find_subtable (0, 0); - if (!subtable) - { - subtable = table->find_subtable (3, 0); - if (subtable) symbol = true; - } - /* Meh. */ - if (!subtable) subtable = &Null(CmapSubtable); + bool symbol; + subtable = table->find_best_subtable (&symbol); /* UVS subtable. */ if (!subtable_uvs) commit b41c43b4e112bfa38fef35694842f242c28a7da2 Author: Behdad Esfahbod <beh...@behdad.org> Date: Sat Aug 25 15:25:03 2018 -0700 Minor diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index a25d13cf..1152c8ea 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -244,14 +244,12 @@ struct CmapSubtableFormat4 glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2; } - static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph) + inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const { - const accelerator_t *thiz = (const accelerator_t *) obj; - /* Custom two-array bsearch. */ - int min = 0, max = (int) thiz->segCount - 1; - const HBUINT16 *startCount = thiz->startCount; - const HBUINT16 *endCount = thiz->endCount; + int min = 0, max = (int) this->segCount - 1; + const HBUINT16 *startCount = this->startCount; + const HBUINT16 *endCount = this->endCount; unsigned int i; while (min <= max) { @@ -270,24 +268,37 @@ struct CmapSubtableFormat4 found: hb_codepoint_t gid; - unsigned int rangeOffset = thiz->idRangeOffset[i]; + unsigned int rangeOffset = this->idRangeOffset[i]; if (rangeOffset == 0) - gid = codepoint + thiz->idDelta[i]; + gid = codepoint + this->idDelta[i]; else { /* Somebody has been smoking... */ - unsigned int index = rangeOffset / 2 + (codepoint - thiz->startCount[i]) + i - thiz->segCount; - if (unlikely (index >= thiz->glyphIdArrayLength)) + unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; + if (unlikely (index >= this->glyphIdArrayLength)) return false; - gid = thiz->glyphIdArray[index]; + gid = this->glyphIdArray[index]; if (unlikely (!gid)) return false; - gid += thiz->idDelta[i]; + gid += this->idDelta[i]; } *glyph = gid & 0xFFFFu; return *glyph != 0; } + static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph) + { + return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph); + } + inline void collect_unicodes (hb_set_t *out) const + { + for (unsigned int i = 0; i < this->segCount; i++) + { + /* XXX This does NOT skip over chars mapping to gid0... */ + if (this->startCount[i] != 0xFFFFu || this->endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF) + hb_set_add_range (out, this->startCount[i], this->endCount[i]); + } + } const HBUINT16 *endCount; const HBUINT16 *startCount; @@ -306,16 +317,9 @@ struct CmapSubtableFormat4 } inline void collect_unicodes (hb_set_t *out) const { - unsigned int segCount = this->segCountX2 / 2; - const HBUINT16 *endCount = this->values; - const HBUINT16 *startCount = endCount + segCount + 1; - - for (unsigned int i = 0; i < segCount; i++) - { - /* XXX This does NOT skip over chars mapping to gid0... */ - if (startCount[i] != 0xFFFFu || endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF) - hb_set_add_range (out, startCount[i], endCount[i]); - } + accelerator_t accel; + accel.init (this); + accel.collect_unicodes (out); } inline bool sanitize (hb_sanitize_context_t *c) const commit 7d382fa276f44b7b163e98d434cc79f958bf87fb Author: Behdad Esfahbod <beh...@behdad.org> Date: Sat Aug 25 09:35:45 2018 -0700 [cmap] Implement collect_unicodes() for Format0/6/10 diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index 2f88a767..a25d13cf 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -54,6 +54,12 @@ struct CmapSubtableFormat0 *glyph = gid; return *glyph != 0; } + inline void collect_unicodes (hb_set_t *out) const + { + for (unsigned int i = 0; i < 256; i++) + if (glyphIdArray[i]) + out->add (i); + } inline bool sanitize (hb_sanitize_context_t *c) const { @@ -306,6 +312,7 @@ struct CmapSubtableFormat4 for (unsigned int i = 0; i < segCount; i++) { + /* XXX This does NOT skip over chars mapping to gid0... */ if (startCount[i] != 0xFFFFu || endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF) hb_set_add_range (out, startCount[i], endCount[i]); } @@ -384,7 +391,7 @@ struct CmapSubtableLongGroup HBUINT32 startCharCode; /* First character code in this group. */ HBUINT32 endCharCode; /* Last character code in this group. */ HBUINT32 glyphID; /* Glyph index; interpretation depends on - * subtable format. */ + * subtable format. */ public: DEFINE_SIZE_STATIC (12); }; @@ -401,6 +408,14 @@ struct CmapSubtableTrimmed *glyph = gid; return *glyph != 0; } + inline void collect_unicodes (hb_set_t *out) const + { + hb_codepoint_t start = startCharCode; + unsigned int count = glyphIdArray.len; + for (unsigned int i = 0; i < count; i++) + if (glyphIdArray[i]) + out->add (start + i); + } inline bool sanitize (hb_sanitize_context_t *c) const { @@ -694,10 +709,10 @@ struct CmapSubtable inline void collect_unicodes (hb_set_t *out) const { switch (u.format) { -// case 0: u.format0 .collect_unicodes (out); return; + case 0: u.format0 .collect_unicodes (out); return; case 4: u.format4 .collect_unicodes (out); return; -// case 6: u.format6 .collect_unicodes (out); return; -// case 10: u.format10.collect_unicodes (out); return; + case 6: u.format6 .collect_unicodes (out); return; + case 10: u.format10.collect_unicodes (out); return; case 12: u.format12.collect_unicodes (out); return; case 13: u.format13.collect_unicodes (out); return; case 14: commit bd0e542525d41d9ebe51cbcab8151d65eb984b2e Author: Behdad Esfahbod <beh...@behdad.org> Date: Sat Aug 25 09:33:30 2018 -0700 [cmap] Simplify collect_unicodes() Don't use accelerator (almost). Hooks up Format13 as well. diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index 925101f7..2f88a767 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -283,17 +283,6 @@ struct CmapSubtableFormat4 return *glyph != 0; } - static inline void collect_unicodes_func (const void *obj, hb_set_t *out) - { - const accelerator_t *thiz = (const accelerator_t *) obj; - for (unsigned int i = 0; i < thiz->segCount; i++) - { - if (thiz->startCount[i] != 0xFFFFu - || thiz->endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF) - hb_set_add_range (out, thiz->startCount[i], thiz->endCount[i]); - } - } - const HBUINT16 *endCount; const HBUINT16 *startCount; const HBUINT16 *idDelta; @@ -309,6 +298,18 @@ struct CmapSubtableFormat4 accel.init (this); return accel.get_glyph_func (&accel, codepoint, glyph); } + inline void collect_unicodes (hb_set_t *out) const + { + unsigned int segCount = this->segCountX2 / 2; + const HBUINT16 *endCount = this->values; + const HBUINT16 *startCount = endCount + segCount + 1; + + for (unsigned int i = 0; i < segCount; i++) + { + if (startCount[i] != 0xFFFFu || endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF) + hb_set_add_range (out, startCount[i], endCount[i]); + } + } inline bool sanitize (hb_sanitize_context_t *c) const { @@ -690,6 +691,19 @@ struct CmapSubtable default: return false; } } + inline void collect_unicodes (hb_set_t *out) const + { + switch (u.format) { +// case 0: u.format0 .collect_unicodes (out); return; + case 4: u.format4 .collect_unicodes (out); return; +// case 6: u.format6 .collect_unicodes (out); return; +// case 10: u.format10.collect_unicodes (out); return; + case 12: u.format12.collect_unicodes (out); return; + case 13: u.format13.collect_unicodes (out); return; + case 14: + default: return; + } + } inline bool sanitize (hb_sanitize_context_t *c) const { @@ -901,9 +915,10 @@ struct cmap { this->blob = hb_sanitize_context_t().reference_table<cmap> (face); const cmap *table = this->blob->as<cmap> (); - const CmapSubtable *subtable = nullptr; const CmapSubtableFormat14 *subtable_uvs = nullptr; + subtable = nullptr; + bool symbol = false; /* 32-bit subtables. */ if (!subtable) subtable = table->find_subtable (3, 10); @@ -939,24 +954,20 @@ struct cmap if (unlikely (symbol)) { this->get_glyph_func = get_glyph_from_symbol<CmapSubtable>; - this->collect_unicodes_func = collect_unicodes_func_nil; } else { switch (subtable->u.format) { /* Accelerate format 4 and format 12. */ default: this->get_glyph_func = get_glyph_from<CmapSubtable>; - this->collect_unicodes_func = collect_unicodes_func_nil; break; case 12: this->get_glyph_func = get_glyph_from<CmapSubtableFormat12>; - this->collect_unicodes_func = collect_unicodes_from<CmapSubtableFormat12>; break; case 4: { this->format4_accel.init (&subtable->u.format4); this->get_glyph_data = &this->format4_accel; this->get_glyph_func = this->format4_accel.get_glyph_func; - this->collect_unicodes_func = this->format4_accel.collect_unicodes_func; } break; } @@ -992,7 +1003,7 @@ struct cmap inline void collect_unicodes (hb_set_t *out) const { - this->collect_unicodes_func (get_glyph_data, out); + subtable->collect_unicodes (out); } protected: @@ -1002,11 +1013,6 @@ struct cmap typedef void (*hb_cmap_collect_unicodes_func_t) (const void *obj, hb_set_t *out); - static inline void collect_unicodes_func_nil (const void *obj, hb_set_t *out) - { - // NOOP - } - template <typename Type> static inline bool get_glyph_from (const void *obj, hb_codepoint_t codepoint, @@ -1047,9 +1053,9 @@ struct cmap } private: + const CmapSubtable *subtable; hb_cmap_get_glyph_func_t get_glyph_func; const void *get_glyph_data; - hb_cmap_collect_unicodes_func_t collect_unicodes_func; CmapSubtableFormat4::accelerator_t format4_accel; commit d60c465627d76fcfbeb37d6b8f9382f3b84ace6e Author: Behdad Esfahbod <beh...@behdad.org> Date: Sat Aug 25 08:47:06 2018 -0700 Rename get_all_codepoints() to collect_unicodes() diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index 8529c6a3..925101f7 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -283,7 +283,7 @@ struct CmapSubtableFormat4 return *glyph != 0; } - static inline void get_all_codepoints_func (const void *obj, hb_set_t *out) + static inline void collect_unicodes_func (const void *obj, hb_set_t *out) { const accelerator_t *thiz = (const accelerator_t *) obj; for (unsigned int i = 0; i < thiz->segCount; i++) @@ -436,7 +436,7 @@ struct CmapSubtableLongSegmented return *glyph != 0; } - inline void get_all_codepoints (hb_set_t *out) const + inline void collect_unicodes (hb_set_t *out) const { for (unsigned int i = 0; i < this->groups.len; i++) { hb_set_add_range (out, @@ -939,24 +939,24 @@ struct cmap if (unlikely (symbol)) { this->get_glyph_func = get_glyph_from_symbol<CmapSubtable>; - this->get_all_codepoints_func = null_get_all_codepoints_func; + this->collect_unicodes_func = collect_unicodes_func_nil; } else { switch (subtable->u.format) { /* Accelerate format 4 and format 12. */ default: this->get_glyph_func = get_glyph_from<CmapSubtable>; - this->get_all_codepoints_func = null_get_all_codepoints_func; + this->collect_unicodes_func = collect_unicodes_func_nil; break; case 12: this->get_glyph_func = get_glyph_from<CmapSubtableFormat12>; - this->get_all_codepoints_func = get_all_codepoints_from<CmapSubtableFormat12>; + this->collect_unicodes_func = collect_unicodes_from<CmapSubtableFormat12>; break; case 4: { this->format4_accel.init (&subtable->u.format4); this->get_glyph_data = &this->format4_accel; this->get_glyph_func = this->format4_accel.get_glyph_func; - this->get_all_codepoints_func = this->format4_accel.get_all_codepoints_func; + this->collect_unicodes_func = this->format4_accel.collect_unicodes_func; } break; } @@ -990,19 +990,19 @@ struct cmap return get_nominal_glyph (unicode, glyph); } - inline void get_all_codepoints (hb_set_t *out) const + inline void collect_unicodes (hb_set_t *out) const { - this->get_all_codepoints_func (get_glyph_data, out); + this->collect_unicodes_func (get_glyph_data, out); } protected: typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph); - typedef void (*hb_cmap_get_all_codepoints_func_t) (const void *obj, + typedef void (*hb_cmap_collect_unicodes_func_t) (const void *obj, hb_set_t *out); - static inline void null_get_all_codepoints_func (const void *obj, hb_set_t *out) + static inline void collect_unicodes_func_nil (const void *obj, hb_set_t *out) { // NOOP } @@ -1017,11 +1017,11 @@ struct cmap } template <typename Type> - static inline void get_all_codepoints_from (const void *obj, + static inline void collect_unicodes_from (const void *obj, hb_set_t *out) { const Type *typed_obj = (const Type *) obj; - typed_obj->get_all_codepoints (out); + typed_obj->collect_unicodes (out); } template <typename Type> @@ -1049,7 +1049,7 @@ struct cmap private: hb_cmap_get_glyph_func_t get_glyph_func; const void *get_glyph_data; - hb_cmap_get_all_codepoints_func_t get_all_codepoints_func; + hb_cmap_collect_unicodes_func_t collect_unicodes_func; CmapSubtableFormat4::accelerator_t format4_accel; diff --git a/src/hb-subset.cc b/src/hb-subset.cc index 88f6d678..2e991de2 100644 --- a/src/hb-subset.cc +++ b/src/hb-subset.cc @@ -237,15 +237,15 @@ hb_subset (hb_face_t *source, } /** - * hb_subset_get_all_codepoints: + * hb_subset_collect_unicodes: * @source: font face data to load. * @out: set to add the all codepoints covered by font face, source. */ void -hb_subset_get_all_codepoints (hb_face_t *source, hb_set_t *out) +hb_subset_collect_unicodes (hb_face_t *source, hb_set_t *out) { OT::cmap::accelerator_t cmap; cmap.init (source); - cmap.get_all_codepoints (out); + cmap.collect_unicodes (out); cmap.fini(); } diff --git a/src/hb-subset.h b/src/hb-subset.h index f6d2ae0a..b79b8094 100644 --- a/src/hb-subset.h +++ b/src/hb-subset.h @@ -80,9 +80,9 @@ hb_subset (hb_face_t *source, hb_subset_profile_t *profile, hb_subset_input_t *input); -/* hb_subset_get_all_codepoints */ +/* hb_subset_collect_unicodes */ HB_EXTERN void -hb_subset_get_all_codepoints (hb_face_t *source, hb_set_t *out); +hb_subset_collect_unicodes (hb_face_t *source, hb_set_t *out); HB_END_DECLS diff --git a/test/api/test-subset-codepoints.c b/test/api/test-subset-codepoints.c index 3bd1fe06..a2c40cd6 100644 --- a/test/api/test-subset-codepoints.c +++ b/test/api/test-subset-codepoints.c @@ -28,12 +28,12 @@ #include "hb-subset-test.h" static void -test_get_all_codepoints_format4 (void) +test_collect_unicodes_format4 (void) { hb_face_t *face = hb_subset_test_open_font("fonts/Roboto-Regular.abc.format4.ttf"); hb_set_t *codepoints = hb_set_create(); - hb_subset_get_all_codepoints (face, codepoints); + hb_subset_collect_unicodes (face, codepoints); hb_codepoint_t cp = HB_SET_VALUE_INVALID; g_assert (hb_set_next (codepoints, &cp)); @@ -49,12 +49,12 @@ test_get_all_codepoints_format4 (void) } static void -test_get_all_codepoints_format12 (void) +test_collect_unicodes_format12 (void) { hb_face_t *face = hb_subset_test_open_font("fonts/Roboto-Regular.abc.format12.ttf"); hb_set_t *codepoints = hb_set_create(); - hb_subset_get_all_codepoints (face, codepoints); + hb_subset_collect_unicodes (face, codepoints); hb_codepoint_t cp = HB_SET_VALUE_INVALID; g_assert (hb_set_next (codepoints, &cp)); @@ -70,12 +70,12 @@ test_get_all_codepoints_format12 (void) } static void -test_get_all_codepoints (void) +test_collect_unicodes (void) { hb_face_t *face = hb_subset_test_open_font("fonts/Roboto-Regular.abc.ttf"); hb_set_t *codepoints = hb_set_create(); - hb_subset_get_all_codepoints (face, codepoints); + hb_subset_collect_unicodes (face, codepoints); hb_codepoint_t cp = HB_SET_VALUE_INVALID; g_assert (hb_set_next (codepoints, &cp)); @@ -95,9 +95,9 @@ main (int argc, char **argv) { hb_test_init (&argc, &argv); - hb_test_add (test_get_all_codepoints); - hb_test_add (test_get_all_codepoints_format4); - hb_test_add (test_get_all_codepoints_format12); + hb_test_add (test_collect_unicodes); + hb_test_add (test_collect_unicodes_format4); + hb_test_add (test_collect_unicodes_format12); return hb_test_run(); } diff --git a/test/fuzzing/hb-subset-get-codepoints-fuzzer.cc b/test/fuzzing/hb-subset-get-codepoints-fuzzer.cc index 38f338ba..bcdafebd 100644 --- a/test/fuzzing/hb-subset-get-codepoints-fuzzer.cc +++ b/test/fuzzing/hb-subset-get-codepoints-fuzzer.cc @@ -13,7 +13,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) hb_face_t *face = hb_face_create (blob, 0); hb_set_t *output = hb_set_create(); - hb_subset_get_all_codepoints (face, output); + hb_subset_collect_unicodes (face, output); hb_set_destroy (output); hb_face_destroy (face); commit 531051b8b904cf4eb6a50bacebc11c2d85e40140 Author: Behdad Esfahbod <beh...@behdad.org> Date: Sat Aug 25 08:44:18 2018 -0700 [ot-font] Return char-not-found if mapped to gid 0 OpenType cmap table can use gid=0 to mean "not covered" to produce more optimized cmap subtables. Return false from get_nominal_glyph() for those. hb-ft already does this. diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh index 67a9c7dd..8529c6a3 100644 --- a/src/hb-ot-cmap-table.hh +++ b/src/hb-ot-cmap-table.hh @@ -52,7 +52,7 @@ struct CmapSubtableFormat0 if (!gid) return false; *glyph = gid; - return true; + return *glyph != 0; } inline bool sanitize (hb_sanitize_context_t *c) const @@ -280,7 +280,7 @@ struct CmapSubtableFormat4 } *glyph = gid & 0xFFFFu; - return true; + return *glyph != 0; } static inline void get_all_codepoints_func (const void *obj, hb_set_t *out) @@ -398,7 +398,7 @@ struct CmapSubtableTrimmed if (!gid) return false; *glyph = gid; - return true; + return *glyph != 0; } inline bool sanitize (hb_sanitize_context_t *c) const @@ -433,7 +433,7 @@ struct CmapSubtableLongSegmented if (i == -1) return false; *glyph = T::group_get_glyph (groups[i], codepoint); - return true; + return *glyph != 0; } inline void get_all_codepoints (hb_set_t *out) const commit aadb2a9188f143126392c0b0f139326dcf7f9c8d Author: Behdad Esfahbod <beh...@behdad.org> Date: Sat Aug 25 08:18:53 2018 -0700 Add face-builder New API: +hb_face_builder_create +hb_face_builder_add_table diff --git a/docs/harfbuzz-sections.txt b/docs/harfbuzz-sections.txt index 16b66277..f98e8cc1 100644 --- a/docs/harfbuzz-sections.txt +++ b/docs/harfbuzz-sections.txt @@ -170,6 +170,7 @@ hb_coretext_font_get_ct_font <SECTION> <FILE>hb-face</FILE> hb_face_count +hb_face_t hb_face_create hb_face_create_for_tables hb_face_destroy @@ -188,7 +189,8 @@ hb_face_set_glyph_count hb_face_set_index hb_face_set_upem hb_face_set_user_data -hb_face_t +hb_face_builder_create +hb_face_builder_add_table </SECTION> <SECTION> @@ -226,6 +228,7 @@ hb_font_get_glyph hb_font_get_glyph_advance_for_direction hb_font_get_glyph_advance_func_t hb_font_get_glyph_advances_for_direction +hb_font_get_glyph_advances_func_t hb_font_get_glyph_contour_point hb_font_get_glyph_contour_point_for_origin hb_font_get_glyph_contour_point_func_t diff --git a/src/hb-face.cc b/src/hb-face.cc index 49f29d3f..9cb0f265 100644 --- a/src/hb-face.cc +++ b/src/hb-face.cc @@ -512,3 +512,152 @@ hb_face_get_table_tags (const hb_face_t *face, return ot_face.get_table_tags (start_offset, table_count, table_tags); } + + +/* + * face-builder: A face that has add_table(). + */ + +struct hb_face_builder_data_t +{ + struct table_entry_t + { + inline int cmp (const hb_tag_t *t) const + { + if (*t < tag) return -1; + if (*t > tag) return -1; + return 0; + } + + hb_tag_t tag; + hb_blob_t *blob; + }; + + hb_vector_t<table_entry_t, 32> tables; +}; + +static hb_face_builder_data_t * +_hb_face_builder_data_create (void) +{ + hb_face_builder_data_t *data = (hb_face_builder_data_t *) calloc (1, sizeof (hb_face_builder_data_t)); + if (unlikely (!data)) + return nullptr; + + data->tables.init (); + + return data; +} + +static void +_hb_face_builder_data_destroy (void *user_data) +{ + hb_face_builder_data_t *data = (hb_face_builder_data_t *) user_data; + + for (unsigned int i = 0; i < data->tables.len; i++) + hb_blob_destroy (data->tables[i].blob); + + data->tables.fini (); + + free (data); +} + +static hb_blob_t * +_hb_face_builder_data_reference_blob (hb_face_builder_data_t *data) +{ + + unsigned int table_count = data->tables.len; + unsigned int face_length = table_count * 16 + 12; + + for (unsigned int i = 0; i < table_count; i++) + face_length += hb_ceil_to_4 (hb_blob_get_length (data->tables.arrayZ[i].blob)); + + char *buf = (char *) malloc (face_length); + if (unlikely (!buf)) + return nullptr; + + hb_serialize_context_t c (buf, face_length); + OT::OpenTypeFontFile *f = c.start_serialize<OT::OpenTypeFontFile> (); + + bool is_cff = data->tables.lsearch (HB_TAG ('C','F','F',' ')) || data->tables.lsearch (HB_TAG ('C','F','F','2')); + hb_tag_t sfnt_tag = is_cff ? OT::OpenTypeFontFile::CFFTag : OT::OpenTypeFontFile::TrueTypeTag; + + Supplier<hb_tag_t> tags_supplier (&data->tables[0].tag, table_count, sizeof (data->tables[0])); + Supplier<hb_blob_t *> blobs_supplier (&data->tables[0].blob, table_count, sizeof (data->tables[0])); + bool ret = f->serialize_single (&c, + sfnt_tag, + tags_supplier, + blobs_supplier, + table_count); + + c.end_serialize (); + + if (unlikely (!ret)) + { + free (buf); + return nullptr; + } + + return hb_blob_create (buf, face_length, HB_MEMORY_MODE_WRITABLE, buf, free); +} + +static hb_blob_t * +_hb_face_builder_reference_table (hb_face_t *face, hb_tag_t tag, void *user_data) +{ + hb_face_builder_data_t *data = (hb_face_builder_data_t *) user_data; + + if (!tag) + return _hb_face_builder_data_reference_blob (data); + + hb_face_builder_data_t::table_entry_t *entry = data->tables.lsearch (tag); + if (entry) + return hb_blob_reference (entry->blob); + + return nullptr; +} + + +/** + * hb_face_builder_create: + * + * Creates a #hb_face_t that can be used with hb_face_builder_add_table(). + * After tables are added to the face, it can be compiled to a binary + * font file by calling hb_face_reference_blob(). + * + * Return value: (transfer full) New face. + * + * Since: REPLACEME + **/ +hb_face_t * +hb_face_builder_create (void) +{ + hb_face_builder_data_t *data = _hb_face_builder_data_create (); + if (unlikely (!data)) return hb_face_get_empty (); + + return hb_face_create_for_tables (_hb_face_builder_reference_table, + data, + _hb_face_builder_data_destroy); +} + +/** + * hb_face_builder_add_table: + * + * Add table for @tag with data provided by @blob to the face. @face must + * be created using hb_face_builder_create(). + * + * Since: REPLACEME + **/ +hb_bool_t +hb_face_builder_add_table (hb_face_t *face, hb_tag_t tag, hb_blob_t *blob) +{ + if (unlikely (face->destroy != (hb_destroy_func_t) _hb_face_builder_data_destroy)) + return false; + + hb_face_builder_data_t *data = (hb_face_builder_data_t *) face->user_data; + hb_face_builder_data_t::table_entry_t *entry = data->tables.push (); + + entry->tag = tag; + entry->blob = hb_blob_reference (blob); + + return true; +} + diff --git a/src/hb-face.h b/src/hb-face.h index 208092ef..2bc3e895 100644 --- a/src/hb-face.h +++ b/src/hb-face.h @@ -120,6 +120,20 @@ hb_face_get_table_tags (const hb_face_t *face, unsigned int *table_count, /* IN/OUT */ hb_tag_t *table_tags /* OUT */); + +/* + * Builder face. + */ + +HB_EXTERN hb_face_t * +hb_face_builder_create (void); + +HB_EXTERN hb_bool_t +hb_face_builder_add_table (hb_face_t *face, + hb_tag_t tag, + hb_blob_t *blob); + + HB_END_DECLS #endif /* HB_FACE_H */ diff --git a/src/hb-subset-plan.cc b/src/hb-subset-plan.cc index 12566827..6e556c20 100644 --- a/src/hb-subset-plan.cc +++ b/src/hb-subset-plan.cc @@ -154,7 +154,7 @@ hb_subset_plan_create (hb_face_t *face, plan->unicodes = hb_set_create(); plan->glyphs.init(); plan->source = hb_face_reference (face); - plan->dest = hb_subset_face_create (); + plan->dest = hb_face_builder_create (); plan->codepoint_to_glyph = hb_map_create(); plan->glyph_map = hb_map_create(); diff --git a/src/hb-subset-plan.hh b/src/hb-subset-plan.hh index 7501294d..b7f14d2e 100644 --- a/src/hb-subset-plan.hh +++ b/src/hb-subset-plan.hh @@ -89,7 +89,7 @@ struct hb_subset_plan_t hb_blob_get_length (contents), hb_blob_get_length (source_blob)); hb_blob_destroy (source_blob); - return hb_subset_face_add_table(dest, tag, contents); + return hb_face_builder_add_table (dest, tag, contents); } }; diff --git a/src/hb-subset-private.hh b/src/hb-subset-private.hh index 6b2b207f..42c93d7d 100644 --- a/src/hb-subset-private.hh +++ b/src/hb-subset-private.hh @@ -34,8 +34,6 @@ #include "hb-font-private.hh" -typedef struct hb_subset_face_data_t hb_subset_face_data_t; - struct hb_subset_input_t { hb_object_header_t header; ASSERT_POD (); @@ -54,10 +52,5 @@ struct hb_subset_input_t { */ }; -HB_INTERNAL hb_face_t * -hb_subset_face_create (void); - -HB_INTERNAL hb_bool_t -hb_subset_face_add_table (hb_face_t *face, hb_tag_t tag, hb_blob_t *blob); #endif /* HB_SUBSET_PRIVATE_HH */ diff --git a/src/hb-subset.cc b/src/hb-subset.cc index 411c6b86..88f6d678 100644 --- a/src/hb-subset.cc +++ b/src/hb-subset.cc @@ -96,135 +96,6 @@ _subset (hb_subset_plan_t *plan) } -/* - * A face that has add_table(). - */ - -struct hb_subset_face_data_t -{ - struct table_entry_t - { - inline int cmp (const hb_tag_t *t) const - { - if (*t < tag) return -1; - if (*t > tag) return -1; - return 0; - } - - hb_tag_t tag; - hb_blob_t *blob; - }; - - hb_vector_t<table_entry_t, 32> tables; -}; - -static hb_subset_face_data_t * -_hb_subset_face_data_create (void) -{ - hb_subset_face_data_t *data = (hb_subset_face_data_t *) calloc (1, sizeof (hb_subset_face_data_t)); - if (unlikely (!data)) - return nullptr; - - data->tables.init (); - - return data; -} - -static void -_hb_subset_face_data_destroy (void *user_data) -{ - hb_subset_face_data_t *data = (hb_subset_face_data_t *) user_data; - - for (unsigned int i = 0; i < data->tables.len; i++) - hb_blob_destroy (data->tables[i].blob); - - data->tables.fini (); - - free (data); -} - -static hb_blob_t * -_hb_subset_face_data_reference_blob (hb_subset_face_data_t *data) -{ - - unsigned int table_count = data->tables.len; - unsigned int face_length = table_count * 16 + 12; - - for (unsigned int i = 0; i < table_count; i++) - face_length += hb_ceil_to_4 (hb_blob_get_length (data->tables.arrayZ[i].blob)); - - char *buf = (char *) malloc (face_length); - if (unlikely (!buf)) - return nullptr; - - hb_serialize_context_t c (buf, face_length); - OT::OpenTypeFontFile *f = c.start_serialize<OT::OpenTypeFontFile> (); - - bool is_cff = data->tables.lsearch (HB_TAG ('C','F','F',' ')) || data->tables.lsearch (HB_TAG ('C','F','F','2')); - hb_tag_t sfnt_tag = is_cff ? OT::OpenTypeFontFile::CFFTag : OT::OpenTypeFontFile::TrueTypeTag; - - Supplier<hb_tag_t> tags_supplier (&data->tables[0].tag, table_count, sizeof (data->tables[0])); - Supplier<hb_blob_t *> blobs_supplier (&data->tables[0].blob, table_count, sizeof (data->tables[0])); - bool ret = f->serialize_single (&c, - sfnt_tag, - tags_supplier, - blobs_supplier, - table_count); - - c.end_serialize (); - - if (unlikely (!ret)) - { - free (buf); - return nullptr; - } - - return hb_blob_create (buf, face_length, HB_MEMORY_MODE_WRITABLE, buf, free); -} - -static hb_blob_t * -_hb_subset_face_reference_table (hb_face_t *face, hb_tag_t tag, void *user_data) -{ - hb_subset_face_data_t *data = (hb_subset_face_data_t *) user_data; - - if (!tag) - return _hb_subset_face_data_reference_blob (data); - - hb_subset_face_data_t::table_entry_t *entry = data->tables.lsearch (tag); - if (entry) - return hb_blob_reference (entry->blob); - - return nullptr; -} - -/* TODO: Move this to hb-face.h and rename to hb_face_builder_create() - * with hb_face_builder_add_table(). */ -hb_face_t * -hb_subset_face_create (void) -{ - hb_subset_face_data_t *data = _hb_subset_face_data_create (); - if (unlikely (!data)) return hb_face_get_empty (); - - return hb_face_create_for_tables (_hb_subset_face_reference_table, - data, - _hb_subset_face_data_destroy); -} - -hb_bool_t -hb_subset_face_add_table (hb_face_t *face, hb_tag_t tag, hb_blob_t *blob) -{ - if (unlikely (face->destroy != (hb_destroy_func_t) _hb_subset_face_data_destroy)) - return false; - - hb_subset_face_data_t *data = (hb_subset_face_data_t *) face->user_data; - hb_subset_face_data_t::table_entry_t *entry = data->tables.push (); - - entry->tag = tag; - entry->blob = hb_blob_reference (blob); - - return true; -} - static bool _subset_table (hb_subset_plan_t *plan, hb_tag_t tag) commit 6cac9dc9ccb76945e9dfacafc169afad4a6e1e88 Author: Behdad Esfahbod <beh...@behdad.org> Date: Fri Aug 24 10:29:45 2018 -0700 [blob] Add as_bytes() diff --git a/src/hb-blob-private.hh b/src/hb-blob-private.hh index 49ad68ec..0d3fad57 100644 --- a/src/hb-blob-private.hh +++ b/src/hb-blob-private.hh @@ -62,6 +62,10 @@ struct hb_blob_t { return unlikely (!data) ? &Null(Type) : reinterpret_cast<const Type *> (data); } + inline hb_bytes_t as_bytes (void) const + { + return hb_bytes_t (data, length); + } public: hb_object_header_t header; commit 29a9a0883877c598413de78cd0c61f07bc393b2c Author: Behdad Esfahbod <beh...@behdad.org> Date: Fri Aug 24 10:24:45 2018 -0700 [sanitize] Document how sanitize machinery works diff --git a/src/hb-machinery-private.hh b/src/hb-machinery-private.hh index 05add1f4..b33dd996 100644 --- a/src/hb-machinery-private.hh +++ b/src/hb-machinery-private.hh @@ -138,6 +138,68 @@ struct hb_dispatch_context_t /* * Sanitize + * + * + * === Introduction === + * + * The sanitize machinery is at the core of our zero-cost font loading. We + * mmap() font file into memory and create a blob out of it. Font subtables + * are returned as a readonly sub-blob of the main font blob. These table + * blobs are then sanitized before use, to ensure invalid memory access does + * not happen. The toplevel sanitize API use is like, eg. to load the 'head' + * table: + * + * hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table<OT::head> (face); + * + * The blob then can be converted to a head table struct with: + * + * const head *head_table = head_blob->as<head> (); + * + * What the reference_table does is, to call hb_face_reference_table() to load + * the table blob, sanitize it and return either the sanitized blob, or empty + * blob if sanitization failed. The blob->as() function returns the null + * object of its template type argument if the blob is empty. Otherwise, it + * just casts the blob contents to the desired type. + * + * Sanitizing a blob of data with a type T works as follows (with minor + * simplification): + * + * - Cast blob content to T*, call sanitize() method of it, + * - If sanitize succeeded, return blob. + * - Otherwise, if blob is not writable, try making it writable, + * or copy if cannot be made writable in-place, + * - Call sanitize() again. Return blob if sanitize succeeded. + * - Return empty blob otherwise. + * + * + * === The sanitize() contract === + * + * The sanitize() method of each object type shall return true if it's safe to + * call other methods of the object, and false otherwise. + * + * Note that what sanitize() checks for might align with what the specification + * describes as valid table data, but does not have to be. In particular, we + * do NOT want to be pedantic and concern ourselves with validity checks that + * are irrelevant to our use of the table. On the contrary, we want to be + * lenient with error handling and accept invalid data to the extent that it + * does not impose extra burden on us. + * + * Based on the sanitize contract, one can see that what we check for depends + * on how we use the data in other table methods. Ie. if other table methods + * assume that offsets do NOT point out of the table data block, then that's + * something sanitize() must check for (GSUB/GPOS/GDEF/etc work this way). On + * the other hand, if other methods do such checks themselves, then sanitize() + * does not have to bother with them (glyf/local work this way). The choice + * depends on the table structure and sanitize() performance. For example, to + * check glyf/loca offsets in sanitize() would cost O(num-glyphs). We try hard + * to avoid such costs during font loading. By postponing such checks to the + * actual glyph loading, we reduce the sanitize cost to O(1) and total runtime + * cost to O(used-glyphs). As such, this is preferred. + * + * The same argument can be made re GSUB/GPOS/GDEF, but there, the table + * structure is so complicated that by checking all offsets at sanitize() time, + * we make the code much simpler in other methods, as offsets and referenced + * objectes do not need to be validated at each use site. */ /* This limits sanitizing time on really broken fonts. */ commit 142ac5a6be6088771e0ee4b135ba753c80036a9a Author: Behdad Esfahbod <beh...@behdad.org> Date: Fri Aug 24 10:07:49 2018 -0700 [serialize] Add copy_bytes() and copy_blob() diff --git a/src/hb-dsalgs.hh b/src/hb-dsalgs.hh index 8cbe6584..8d59c6cf 100644 --- a/src/hb-dsalgs.hh +++ b/src/hb-dsalgs.hh @@ -502,6 +502,9 @@ struct hb_bytes_t { inline hb_bytes_t (void) : bytes (nullptr), len (0) {} inline hb_bytes_t (const char *bytes_, unsigned int len_) : bytes (bytes_), len (len_) {} + inline hb_bytes_t (const void *bytes_, unsigned int len_) : bytes ((const char *) bytes_), len (len_) {} + + inline void free (void) { ::free ((void *) bytes); bytes = nullptr; len = 0; } inline int cmp (const hb_bytes_t &a) const { diff --git a/src/hb-machinery-private.hh b/src/hb-machinery-private.hh index 99ef485a..05add1f4 100644 --- a/src/hb-machinery-private.hh +++ b/src/hb-machinery-private.hh @@ -402,7 +402,7 @@ struct hb_serialize_context_t } template <typename Type> - inline Type *copy (void) + inline Type *copy (void) const { assert (!this->ran_out_of_room); unsigned int len = this->head - this->start; @@ -411,6 +411,25 @@ struct hb_serialize_context_t memcpy (p, this->start, len); return reinterpret_cast<Type *> (p); } + inline hb_bytes_t copy_bytes (void) const + { + assert (!this->ran_out_of_room); + unsigned int len = this->head - this->start; + void *p = malloc (len); + if (p) + memcpy (p, this->start, len); + else + return hb_bytes_t (); + return hb_bytes_t (p, len); + } + inline hb_blob_t *copy_blob (void) const + { + assert (!this->ran_out_of_room); + return hb_blob_create (this->start, + this->head - this->start, + HB_MEMORY_MODE_DUPLICATE, + nullptr, nullptr); + } template <typename Type> inline Type *allocate_size (unsigned int size) _______________________________________________ HarfBuzz mailing list HarfBuzz@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/harfbuzz