This is an automated email from the git hooks/post-receive script. rene pushed a commit to branch master in repository graphite2.
commit bf5fc4858bc9318688b5234551e738917ce8ae42 Author: Rene Engelhard <[email protected]> Date: Thu Apr 21 14:48:42 2016 +0200 Imported Upstream version 1.0.3.real --- .hg_archival.txt | 4 +- .hgtags | 4 + contrib/android/jni/Android.mk | 4 +- contrib/android/jni/graphite/Android.mk | 2 +- contrib/android/jni/graphite_layer.cpp | 6 +- contrib/android/jni/loadgr_jni.cpp | 2 +- .../src/org/sil/palaso/helloworld/HelloWorld.java | 2 +- src/CMakeLists.txt | 1 + src/CmapCache.cpp | 46 +- src/CmapCache.h | 47 +- src/Face.cpp | 21 +- src/Face.h | 8 +- src/Main.h | 2 +- src/NameTable.cpp | 50 +-- src/SegCacheStore.cpp | 21 +- src/Segment.cpp | 120 +---- src/{CmapCache.h => UtfCodec.cpp} | 37 +- src/UtfCodec.h | 208 +++++++++ src/files.mk | 4 +- src/gr_segment.cpp | 81 ++-- src/processUTF.h | 494 --------------------- tests/CMakeLists.txt | 1 + tests/segcache/segcachetest.cpp | 42 +- tests/utftest/CMakeLists.txt | 15 + tests/utftest/utftest.cpp | 56 +++ tests/vm/CMakeLists.txt | 1 + 26 files changed, 505 insertions(+), 774 deletions(-) diff --git a/.hg_archival.txt b/.hg_archival.txt index 2db55cc..2d78a37 100644 --- a/.hg_archival.txt +++ b/.hg_archival.txt @@ -1,5 +1,5 @@ repo: 999e2033695c3bcf2f65d611737ac9008805bd58 -node: cb735be7d86d894f0667cb63dffc4273fd53d9fe +node: 418e55d88178b9bd870bab38be8768aecb743829 branch: default latesttag: 1.0.3 -latesttagdistance: 2 +latesttagdistance: 1 diff --git a/.hgtags b/.hgtags index 094d794..82d1a33 100644 --- a/.hgtags +++ b/.hgtags @@ -14,3 +14,7 @@ bedb05f72d56f24ca0fc333fd14eabb1ec553902 1.0.1 0fa690ff089ce0bc382a553cc01c0b721fbdee5c 1.0.2 b10bcaf1302411513a5961d1854ff8c02e5ad5e6 1.0.2 8795e344f7964bdf8ef4607004f01b94c41e5775 1.0.3 +8795e344f7964bdf8ef4607004f01b94c41e5775 1.0.3 +0000000000000000000000000000000000000000 1.0.3 +0000000000000000000000000000000000000000 1.0.3 +f148746a0d99d2f9bc050906ce78815565a0d0b4 1.0.3 diff --git a/contrib/android/jni/Android.mk b/contrib/android/jni/Android.mk index 7ae1d28..224e329 100644 --- a/contrib/android/jni/Android.mk +++ b/contrib/android/jni/Android.mk @@ -28,9 +28,9 @@ LOCAL_PATH := $(call my-dir) MY_ANDROID_SRC := $(HOME)/Work/android/android-src MY_ANDROID_LIBS := $(MY_ANDROID_SRC)/out/target/product/generic/symbols/system/lib -#MY_ANDROID_LIBS := $(HOME)/Work/android/android-sdk-linux_x86/platforms/android-8/symbols/system/lib +MY_ANDROID_LIBS := $(HOME)/Work/android/android-sdk-linux_x86/platforms/android-8/symbols/system/lib MY_SKIA := $(MY_ANDROID_SRC)/external/skia -#MY_SKIA := $(HOME)/Work/android/skia/8 +MY_SKIA := $(HOME)/Work/android/skia/8 include $(CLEAR_VARS) diff --git a/contrib/android/jni/graphite/Android.mk b/contrib/android/jni/graphite/Android.mk index 9a5990a..ffb6d3e 100644 --- a/contrib/android/jni/graphite/Android.mk +++ b/contrib/android/jni/graphite/Android.mk @@ -11,7 +11,7 @@ include ../../src/files.mk LOCAL_MODULE := graphite2 #LOCAL_SRC_FILES := $(foreach v,$(GR2_SOURCES),./$(v)) LOCAL_SRC_FILES := $(GR2_SOURCES) -LOCAL_C_INCLUDES := ../../include +LOCAL_C_INCLUDES := ../../include ../../src LOCAL_EXPORT_C_INCLUDES := ../../include #LOCAL_C_INCLUDES := /home/mhosken/Work/dev/Graphite/graphiteng/include #LOCAL_EXPORT_C_INCLUDES := /home/mhosken/Work/dev/Graphite/graphiteng/include diff --git a/contrib/android/jni/graphite_layer.cpp b/contrib/android/jni/graphite_layer.cpp index 425e9e4..29b1a5f 100644 --- a/contrib/android/jni/graphite_layer.cpp +++ b/contrib/android/jni/graphite_layer.cpp @@ -448,11 +448,11 @@ func_map thismap[] = { { "_ZN8SkDevice8drawTextERK6SkDrawPKvjffRK7SkPaint", "_ZN10mySkDevice8drawTextERK6SkDrawPKvjffRK7SkPaint", 0, 0 }, // SkTypeface::CreateFromName mySkTypeface::CreateFromName { "_ZN10SkTypeface14CreateFromNameEPKcNS_5StyleE", "_ZN12mySkTypeface14CreateFromNameEPKcN10SkTypeface5StyleE", 0, 0 }, - // SkPaint::measureText SkPaint::measureText + // SkPaint::measureText mySkPaint::measureText { "_ZNK7SkPaint11measureTextEPKvjP6SkRectf", "_ZNK9mySkPaint11measureTextEPKvjP6SkRectf", 0, 0 }, - // SkPaint::measureText SkPaint::measureText + // SkPaint::measureText mySkPaint::measureText { "_ZNK7SkPaint11measureTextEPKvj", "_ZNK9mySkPaint11measureTextEPKvj", 0, 0}, - // SkPaint::getTextWidths + // SkPaint::getTextWidths mySkPaint::getTextWidths { "_ZNK7SkPaint13getTextWidthsEPKvjPfP6SkRect", "_ZNK9mySkPaint13getTextWidthsEPKvjPfP6SkRect", 0, 0} }; diff --git a/contrib/android/jni/loadgr_jni.cpp b/contrib/android/jni/loadgr_jni.cpp index 5b6e3b0..64a9ecf 100644 --- a/contrib/android/jni/loadgr_jni.cpp +++ b/contrib/android/jni/loadgr_jni.cpp @@ -180,7 +180,7 @@ extern "C" jobject Java_org_sil_palaso_Graphite_addFontResource( JNIEnv *env, jo f->next = myfonts; f->tf = tf; f->name = rtl ? "" : name; - f->rtl = rtl ? 3 : 0; + f->rtl = rtl ? 7 : 0; if (!gFTLibrary && FT_Init_FreeType(&gFTLibrary)) { delete f->tf; diff --git a/contrib/android/src/org/sil/palaso/helloworld/HelloWorld.java b/contrib/android/src/org/sil/palaso/helloworld/HelloWorld.java index 57ee06a..b1a3922 100644 --- a/contrib/android/src/org/sil/palaso/helloworld/HelloWorld.java +++ b/contrib/android/src/org/sil/palaso/helloworld/HelloWorld.java @@ -47,7 +47,7 @@ public class HelloWorld extends Activity { TextView tv; WebView wv; // String s = "မဂင်္ဂလာ|မဘ္ဘာ၊ ဤကဲ့|သို့|ရာ|ဇ|ဝင်|တင်|မည့် ကြေ|ညာ|ချက်|ကို ပြု|လုပ်|ပြီး|နောက် ဤညီ|လာ|ခံ|အ|စည်း|အ|ဝေး|ကြီး|က ကမ္ဘာ့|ကု|လ|သ|မဂ္ဂ|အ|ဖွဲ့|ဝင် နိုင်|ငံ အား|လုံး|အား ထို|ကြေ|ညာ|စာ|တမ်း|ကြီး၏ စာ|သား|ကို|အ|များ|ပြည်|သူ|တို့ ကြား|သိ|စေ|ရန် ကြေ|ညာ|ပါ|မည့် အ|ကြောင်း|ကို|လည်း|ကောင်း၊ ထို့|ပြင်|နိုင်|ငံ|များ၊ သို့|တည်း|မ|ဟုတ် နယ်|မြေ|များ၏ နိုင်|ငံ|ရေး အ|ဆင့်|အ|တ|န်း|ကို လိုက်၍ ခွဲ|ခြား|ခြင်း မ|� ��ြု|ဘဲ|အ|ဓိ|က|အား|ဖြင့် စာ|သင်|ကျောင်း|များ|နှင့် အ|ခြား|ပ|ညာ|ရေး အ|ဖွဲ့|အ|စည်း|များ|တ [...] - String s = "لمّا كان الاعتراف بالكرامة المتأصلة في جميع أعضاء الأسرة البشرية وبحقوقهم المتساوية الثابتة هو أساس الحرية والعدل \u06F1\u06F2\u06F3 والسلام في العالم."; + String s = "لمّا كان الاعتراف بالكرامة المتأصلة في جميع أعضاء الأسرة (البشرية) وبحقوقهم المتساوية الثابتة هو أساس الحرية والعدل \u06F1\u06F2\u06F3 والسلام في العالم."; String w = "\uFEFF<html><body style=\"font-family: Scheh\">Test: " + s + "</body></html>"; // <3> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0d50933..d47ce04 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -85,6 +85,7 @@ add_library(graphite2 SHARED Slot.cpp Sparse.cpp TtfUtil.cpp + UtfCodec.cpp XmlTraceLog.cpp XmlTraceLogTags.cpp) diff --git a/src/CmapCache.cpp b/src/CmapCache.cpp index 0fc85ee..dc2e43e 100644 --- a/src/CmapCache.cpp +++ b/src/CmapCache.cpp @@ -39,8 +39,8 @@ CmapCache::CmapCache(const void* cmapTable, size_t length) const void * table310 = TtfUtil::FindCmapSubtable(cmapTable, 3, 10, length); m_isBmpOnly = (!table310); int rangeKey = 0; - unsigned int codePoint = 0; - unsigned int prevCodePoint = 0; + uint32 codePoint = 0, + prevCodePoint = 0; if (table310 && TtfUtil::CheckCmap310Subtable(table310)) { m_blocks = grzeroalloc<uint16*>(0x1100); @@ -92,12 +92,50 @@ CmapCache::CmapCache(const void* cmapTable, size_t length) } } -CmapCache::~CmapCache() +CmapCache::~CmapCache() throw() { unsigned int numBlocks = (m_isBmpOnly)? 0x100 : 0x1100; for (unsigned int i = 0; i < numBlocks; i++) free(m_blocks[i]); free(m_blocks); - m_blocks = NULL; +} + +uint16 CmapCache::operator [] (const uint32 usv) const throw() +{ + if ((m_isBmpOnly && usv > 0xFFFF) || (usv > 0x10FFFF)) + return 0; + const uint32 block = 0xFFFF & (usv >> 8); + if (m_blocks[block]) + return m_blocks[block][usv & 0xFF]; + return 0; +}; + +CmapCache::operator bool() const throw() +{ + return m_blocks; +} + + +DirectCmap::DirectCmap(const void* cmap, size_t length) +{ + _ctable = TtfUtil::FindCmapSubtable(cmap, 3, 1, length); + if (!_ctable || !TtfUtil::CheckCmap31Subtable(_ctable)) + { + _ctable = 0; + return; + } + _stable = TtfUtil::FindCmapSubtable(cmap, 3, 10, length); + if (_stable && !TtfUtil::CheckCmap310Subtable(_stable)) + _stable = 0; +} + +uint16 DirectCmap::operator [] (const uint32 usv) const throw() +{ + return usv > 0xFFFF ? (_stable ? TtfUtil::Cmap310Lookup(_stable, usv) : 0) : TtfUtil::Cmap31Lookup(_ctable, usv); +} + +DirectCmap::operator bool () const throw() +{ + return _ctable; } diff --git a/src/CmapCache.h b/src/CmapCache.h index dc1603b..1facde3 100644 --- a/src/CmapCache.h +++ b/src/CmapCache.h @@ -26,24 +26,45 @@ of the License or (at your option) any later version. */ #pragma once -#include <graphite2/Types.h> +#include <Main.h> namespace graphite2 { -class CmapCache +class Face; + +class Cmap +{ +public: + virtual ~Cmap() throw() {} + + virtual uint16 operator [] (const uint32) const throw() { return 0; } + + virtual operator bool () const throw() { return false; } + + CLASS_NEW_DELETE; +}; + +class DirectCmap : public Cmap +{ +public: + DirectCmap(const void* cmap, size_t length); + virtual uint16 operator [] (const uint32 usv) const throw(); + virtual operator bool () const throw(); + + CLASS_NEW_DELETE; +private: + const void *_stable, + *_ctable; +}; + +class CmapCache : public Cmap { public: - CmapCache(const void * cmapTable, size_t length); - ~CmapCache(); - uint16 lookup(unsigned int unicode) const { - if ((m_isBmpOnly && unicode > 0xFFFF) || (unicode > 0x10FFFF)) - return 0; - unsigned int block = (0xFFFFFF & unicode) >> 8; - if (m_blocks && m_blocks[block]) - return m_blocks[block][unicode & 0xFF]; - return 0; - }; - CLASS_NEW_DELETE + CmapCache(const void * cmapTable, size_t length); + virtual ~CmapCache() throw(); + virtual uint16 operator [] (const uint32 usv) const throw(); + virtual operator bool () const throw(); + CLASS_NEW_DELETE; private: bool m_isBmpOnly; uint16 ** m_blocks; diff --git a/src/Face.cpp b/src/Face.cpp index 0c0af0c..060a4da 100644 --- a/src/Face.cpp +++ b/src/Face.cpp @@ -40,10 +40,10 @@ using namespace graphite2; Face::~Face() { delete m_pGlyphFaceCache; - delete m_cmapCache; + delete m_cmap; delete[] m_silfs; m_pGlyphFaceCache = NULL; - m_cmapCache = NULL; + m_cmap = NULL; m_silfs = NULL; delete m_pFileFace; delete m_pNames; @@ -58,13 +58,18 @@ bool Face::readGlyphs(unsigned int faceOptions) m_pGlyphFaceCache = GlyphFaceCache::makeCache(hdr); if (!m_pGlyphFaceCache) return false; + + size_t length = 0; + const byte * table = getTable(Tag::cmap, &length); + if (!table) return false; + if (faceOptions & gr_face_cacheCmap) - { - size_t length = 0; - const byte * table = getTable(Tag::cmap, &length); - if (!table) return false; - m_cmapCache = new CmapCache(table, length); - } + m_cmap = new CmapCache(table, length); + else + m_cmap = new DirectCmap(table, length); + + if (!m_cmap || !*m_cmap) return false; + if (faceOptions & gr_face_preloadGlyphs) { m_pGlyphFaceCache->loadAllGlyphs(); diff --git a/src/Face.h b/src/Face.h index b1a0fab..461a1bc 100644 --- a/src/Face.h +++ b/src/Face.h @@ -46,7 +46,7 @@ namespace graphite2 { class Segment; class FeatureVal; class NameTable; -class CmapCache; +class Cmap; using TtfUtil::Tag; @@ -116,7 +116,7 @@ public: public: Face(const void* appFaceHandle/*non-NULL*/, gr_get_table_fn getTable2) : m_appFaceHandle(appFaceHandle), m_getTable(getTable2), m_pGlyphFaceCache(NULL), - m_cmapCache(NULL), m_numSilf(0), m_silfs(NULL), m_pFileFace(NULL), + m_cmap(NULL), m_numSilf(0), m_silfs(NULL), m_pFileFace(NULL), m_pNames(NULL) {} virtual ~Face(); public: @@ -142,7 +142,7 @@ public: const GlyphFaceCache* getGlyphFaceCache() const { return m_pGlyphFaceCache; } //never NULL void takeFileFace(FileFace* pFileFace/*takes ownership*/); - CmapCache * getCmapCache() const { return m_cmapCache; }; + Cmap & cmap() const { return *m_cmap; }; NameTable * nameTable() const; uint16 languageForLocale(const char * locale) const; @@ -156,7 +156,7 @@ private: // unsigned short m_readglyphs; // how many glyphs have we in m_glyphs? // unsigned short m_capacity; // how big is m_glyphs mutable GlyphFaceCache* m_pGlyphFaceCache; //owned - never NULL - mutable CmapCache* m_cmapCache; // cmap cache if available + mutable Cmap * m_cmap; // cmap cache if available unsigned short m_upem; // design units per em protected: unsigned short m_numSilf; // number of silf subtables in the silf table diff --git a/src/Main.h b/src/Main.h index 1cfd809..29dc4a7 100644 --- a/src/Main.h +++ b/src/Main.h @@ -63,7 +63,7 @@ template <typename T> T * grzeroalloc(size_t n) void operator delete (void * p) throw() { free(p);} \ void operator delete (void *, void *) throw() {} \ void operator delete[] (void * p)throw() { free(p); } \ - void operator delete[] (void *, void *) throw() {} \ + void operator delete[] (void *, void *) throw() {} #ifdef __GNUC__ #define GR_MAYBE_UNUSED __attribute__((unused)) diff --git a/src/NameTable.cpp b/src/NameTable.cpp index c471767..e82a5f7 100644 --- a/src/NameTable.cpp +++ b/src/NameTable.cpp @@ -28,8 +28,7 @@ of the License or (at your option) any later version. #include "Endian.h" #include "NameTable.h" -#include "processUTF.h" - +#include "UtfCodec.h" using namespace graphite2; @@ -146,42 +145,37 @@ void* NameTable::getName(uint16& languageId, uint16 nameId, gr_encform enc, uint return NULL; } utf16Length >>= 1; // in utf16 units - uint16 * utf16Name = gralloc<uint16>(utf16Length + 1); + utf16::codeunit_t * utf16Name = gralloc<utf16::codeunit_t>(utf16Length); const uint8* pName = m_nameData + offset; for (size_t i = 0; i < utf16Length; i++) { utf16Name[i] = be::read<uint16>(pName); } - utf16Name[utf16Length] = 0; - if (enc == gr_utf16) + switch (enc) { - length = utf16Length; - return utf16Name; - } - else if (enc == gr_utf8) + case gr_utf8: { - uint8* uniBuffer = gralloc<uint8>(3 * utf16Length + 1); - ToUtf8Processor processor(uniBuffer, 3 * utf16Length + 1); - IgnoreErrors ignore; - BufferLimit bufferLimit(gr_utf16, reinterpret_cast<void*>(utf16Name), reinterpret_cast<void*>(utf16Name + utf16Length)); - processUTF<BufferLimit, ToUtf8Processor, IgnoreErrors>(bufferLimit, &processor, &ignore); - length = processor.bytesProcessed(); - uniBuffer[processor.bytesProcessed()] = 0; - free(utf16Name); + utf8::codeunit_t* uniBuffer = gralloc<utf8::codeunit_t>(3 * utf16Length + 1); + utf8::iterator d = uniBuffer; + for (utf16::const_iterator s = utf16Name, e = utf16Name + utf16Length; s != e; ++s, ++d) + *d = *s; + length = d - uniBuffer; + uniBuffer[length] = 0; return uniBuffer; } - else if (enc == gr_utf32) + case gr_utf16: + length = utf16Length; + return utf16Name; + case gr_utf32: { - uint32 * uniBuffer = gralloc<uint32>(utf16Length + 1); - IgnoreErrors ignore; - BufferLimit bufferLimit(gr_utf16, reinterpret_cast<void*>(utf16Name), reinterpret_cast<void*>(utf16Name + utf16Length)); - - ToUtf32Processor processor(uniBuffer, utf16Length); - processUTF(bufferLimit, &processor, &ignore); - length = processor.charsProcessed(); - uniBuffer[length] = 0; - free(utf16Name); - return uniBuffer; + utf32::codeunit_t * uniBuffer = gralloc<utf32::codeunit_t>(utf16Length + 1); + utf32::iterator d = uniBuffer; + for (utf16::const_iterator s = utf16Name, e = utf16Name + utf16Length; s != e; ++s, ++d) + *d = *s; + length = d - uniBuffer; + uniBuffer[length] = 0; + return uniBuffer; + } } length = 0; return NULL; diff --git a/src/SegCacheStore.cpp b/src/SegCacheStore.cpp index 1ce0aa1..5e13c1f 100644 --- a/src/SegCacheStore.cpp +++ b/src/SegCacheStore.cpp @@ -40,26 +40,9 @@ SegCacheStore::SegCacheStore(const Face *face, unsigned int numSilf, size_t maxS assert(face); assert(face->getGlyphFaceCache()); m_maxCmapGid = face->getGlyphFaceCache()->numGlyphs(); - if (face->getCmapCache()) - { - m_spaceGid = face->getCmapCache()->lookup(0x20); - m_zwspGid = face->getCmapCache()->lookup(0x200B); - } - else - { - size_t cmapSize = 0; - const void * cmapTable = face->getTable(Tag::cmap, &cmapSize); - const void * bmpTable = TtfUtil::FindCmapSubtable(cmapTable, 3, 1, cmapSize); - //const void * supplementaryTable = TtfUtil::FindCmapSubtable(cmapTable, 3, 10, cmapSize); - if (bmpTable) - { - m_spaceGid = TtfUtil::Cmap31Lookup(bmpTable, 0x20); - m_zwspGid = TtfUtil::Cmap31Lookup(bmpTable, 0x200B); - // TODO find out if the Cmap(s) can be parsed to find a m_maxCmapGid < num_glyphs - // The Pseudo glyphs may mean that it isn't worth the effort - } - } + m_spaceGid = face->cmap()[0x20]; + m_zwspGid = face->cmap()[0x200B]; } #endif diff --git a/src/Segment.cpp b/src/Segment.cpp index 1fb97e0..b860643 100644 --- a/src/Segment.cpp +++ b/src/Segment.cpp @@ -24,7 +24,7 @@ Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public License, as published by the Free Software Foundation, either version 2 of the License or (at your option) any later version. */ -#include "processUTF.h" +#include "UtfCodec.h" #include <string.h> #include <stdlib.h> @@ -425,105 +425,33 @@ void Segment::logSegment() const #endif - - -class SlotBuilder +template <typename utf_iter> +inline void process_utf_data(Segment & seg, const Face & face, const int fid, utf_iter c, size_t n_chars) { -public: - SlotBuilder(const Face *face2, const Features* pFeats/*must not be NULL*/, Segment* pDest2) - : m_face(face2), - m_pDest(pDest2), - m_ctable(NULL), - m_stable(NULL), - m_fid(pDest2->addFeatures(*pFeats)), - m_nCharsProcessed(0) - { - size_t cmapSize = 0; - const void * table = face2->getTable(Tag::cmap, &cmapSize); - if (!table) return; - m_ctable = TtfUtil::FindCmapSubtable(table, 3, 1, cmapSize); - if (!m_ctable || !TtfUtil::CheckCmap31Subtable(m_ctable)) - { - m_ctable = NULL; - return; - } - m_stable = TtfUtil::FindCmapSubtable(table, 3, 10, cmapSize); - if (m_stable && !TtfUtil::CheckCmap310Subtable(m_stable)) m_stable = NULL; - } - - bool processChar(uint32 cid/*unicode character*/, size_t coffset) //return value indicates if should stop processing - { - if (!m_ctable) return false; - uint16 gid = cid > 0xFFFF ? (m_stable ? TtfUtil::Cmap310Lookup(m_stable, cid) : 0) : (m_ctable ? TtfUtil::Cmap31Lookup(m_ctable, cid) : 0); - if (!gid) - gid = m_face->findPseudo(cid); - m_pDest->appendSlot(m_nCharsProcessed, cid, gid, m_fid, coffset); - ++m_nCharsProcessed; - return true; - } - - size_t charsProcessed() const { return m_nCharsProcessed; } - -private: - const Face *m_face; - Segment *m_pDest; - const void * m_ctable; - const void * m_stable; - const unsigned int m_fid; - size_t m_nCharsProcessed ; -}; - -class CachedSlotBuilder -{ -public: - CachedSlotBuilder(const Face *face2, const Features* pFeats/*must not be NULL*/, Segment* pDest2) - : m_face(face2), - m_cmap(face2->getCmapCache()), - m_pDest(pDest2), - m_breakAttr(pDest2->silf()->aBreak()), - m_fid(pDest2->addFeatures(*pFeats)), - m_nCharsProcessed(0) - { - } - - bool processChar(uint32 cid/*unicode character*/, size_t coffset) //return value indicates if should stop processing - { - if (!m_cmap) return false; - uint16 gid = m_cmap->lookup(cid); - if (!gid) - gid = m_face->findPseudo(cid); - //int16 bw = m_face->glyphAttr(gid, m_breakAttr); - m_pDest->appendSlot(m_nCharsProcessed, cid, gid, m_fid, coffset); - ++m_nCharsProcessed; - return true; - } - - size_t charsProcessed() const { return m_nCharsProcessed; } - -private: - const Face *m_face; - const CmapCache *m_cmap; - Segment *m_pDest; - uint8 m_breakAttr; - const unsigned int m_fid; - size_t m_nCharsProcessed ; -}; + const Cmap & cmap = face.cmap(); + int slotid = 0; + + const typename utf_iter::codeunit_type * const base = c; + for (; n_chars; --n_chars, ++c, ++slotid) + { + const uint32 usv = *c; + uint16 gid = cmap[usv]; + if (!gid) gid = face.findPseudo(usv); + seg.appendSlot(slotid, usv, gid, fid, c - base); + } +} void Segment::read_text(const Face *face, const Features* pFeats/*must not be NULL*/, gr_encform enc, const void* pStart, size_t nChars) { - assert(pFeats); - CharacterCountLimit limit(enc, pStart, nChars); - IgnoreErrors ignoreErrors; - if (face->getCmapCache()) - { - CachedSlotBuilder slotBuilder(face, pFeats, this); - processUTF(limit/*when to stop processing*/, &slotBuilder, &ignoreErrors); - } - else - { - SlotBuilder slotBuilder(face, pFeats, this); - processUTF(limit/*when to stop processing*/, &slotBuilder, &ignoreErrors); - } + assert(face); + assert(pFeats); + + switch (enc) + { + case gr_utf8: process_utf_data(*this, *face, addFeatures(*pFeats), utf8::const_iterator(pStart), nChars); break; + case gr_utf16: process_utf_data(*this, *face, addFeatures(*pFeats), utf16::const_iterator(pStart), nChars); break; + case gr_utf32: process_utf_data(*this, *face, addFeatures(*pFeats), utf32::const_iterator(pStart), nChars); break; + } } void Segment::prepare_pos(const Font * /*font*/) diff --git a/src/CmapCache.h b/src/UtfCodec.cpp similarity index 68% copy from src/CmapCache.h copy to src/UtfCodec.cpp index dc1603b..2064075 100644 --- a/src/CmapCache.h +++ b/src/UtfCodec.cpp @@ -15,8 +15,8 @@ You should also have received a copy of the GNU Lesser General Public License along with this library in the file named "LICENSE". - If not, write to the Free Software Foundation, 51 Franklin Street, - Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the internet at http://www.fsf.org/licenses/lgpl.html. Alternatively, the contents of this file may be used under the terms of the @@ -24,29 +24,22 @@ Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public License, as published by the Free Software Foundation, either version 2 of the License or (at your option) any later version. */ -#pragma once - -#include <graphite2/Types.h> +#include "UtfCodec.h" +//using namespace graphite2; namespace graphite2 { -class CmapCache +} + +using namespace graphite2; + +const int8 _utf_codec<8>::sz_lut[16] = { -public: - CmapCache(const void * cmapTable, size_t length); - ~CmapCache(); - uint16 lookup(unsigned int unicode) const { - if ((m_isBmpOnly && unicode > 0xFFFF) || (unicode > 0x10FFFF)) - return 0; - unsigned int block = (0xFFFFFF & unicode) >> 8; - if (m_blocks && m_blocks[block]) - return m_blocks[block][unicode & 0xFF]; - return 0; - }; - CLASS_NEW_DELETE -private: - bool m_isBmpOnly; - uint16 ** m_blocks; + 1,1,1,1,1,1,1,1, // 1 byte + 0,0,0,0, // trailing byte + 2,2, // 2 bytes + 3, // 3 bytes + 4 // 4 bytes }; -} // namespace graphite2 +const byte _utf_codec<8>::mask_lut[5] = {0x7f, 0xff, 0x3f, 0x1f, 0x0f}; diff --git a/src/UtfCodec.h b/src/UtfCodec.h new file mode 100644 index 0000000..5d5192c --- /dev/null +++ b/src/UtfCodec.h @@ -0,0 +1,208 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include <iterator> +#include "Main.h" +#include "graphite2/Segment.h" + +namespace graphite2 { + +typedef uint32 uchar_t; + +template <int N> +struct _utf_codec +{ + typedef uchar_t codeunit_t; + + static void put(codeunit_t * cp, const uchar_t , int8 & len) throw(); + static uchar_t get(const codeunit_t * cp, int8 & len) throw(); +}; + + +template <> +struct _utf_codec<32> +{ +private: + static const uchar_t limit = 0x110000; +public: + typedef uint32 codeunit_t; + + inline + static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw() + { + *cp = usv; l = 1; + } + + inline + static uchar_t get(const codeunit_t * cp, int8 & l) throw() + { + if (cp[0] < limit) { l = 1; return cp[0]; } + else { l = -1; return 0xFFFD; } + } +}; + + +template <> +struct _utf_codec<16> +{ +private: + static const int32 lead_offset = 0xD800 - (0x10000 >> 10); + static const int32 surrogate_offset = 0x10000 - (0xD800 << 10) - 0xDC00; +public: + typedef uint16 codeunit_t; + + inline + static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw() + { + if (usv < 0x10000) { l = 1; cp[0] = codeunit_t(usv); } + else + { + cp[0] = codeunit_t(lead_offset + (usv >> 10)); + cp[1] = codeunit_t(0xDC00 + (usv & 0x3FF)); + l = 2; + } + } + + inline + static uchar_t get(const codeunit_t * cp, int8 & l) throw() + { + const uint32 uh = cp[0]; + l = 1; + + if (0xD800 > uh || uh > 0xDFFF) { return uh; } + const uint32 ul = cp[1]; + if (uh > 0xDBFF || 0xDC00 > ul || ul > 0xDFFF) { l = -1; return 0xFFFD; } + ++l; + return (uh<<10) + ul + surrogate_offset; + } +}; + + +template <> +struct _utf_codec<8> +{ +private: + static const int8 sz_lut[16]; + static const byte mask_lut[5]; + + +public: + typedef uint8 codeunit_t; + + inline + static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw() + { + if (usv < 0x80) {l = 1; cp[0] = usv; return; } + if (usv < 0x0800) {l = 2; cp[0] = 0xC0 + (usv >> 6); cp[1] = 0x80 + (usv & 0x3F); return; } + if (usv < 0x10000) {l = 3; cp[0] = 0xE0 + (usv >> 12); cp[1] = 0x80 + ((usv >> 6) & 0x3F); cp[2] = 0x80 + (usv & 0x3F); return; } + else {l = 4; cp[0] = 0xF0 + (usv >> 18); cp[1] = 0x80 + ((usv >> 12) & 0x3F); cp[2] = 0x80 + ((usv >> 6) & 0x3F); cp[3] = 0x80 + (usv & 0x3F); return; } + } + + inline + static uchar_t get(const codeunit_t * cp, int8 & l) throw() + { + const int8 seq_sz = sz_lut[*cp >> 4]; + uchar_t u = *cp & mask_lut[seq_sz]; + l = 1; + bool toolong = false; + + switch(seq_sz) { + case 4: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong = (u < 0x10); + case 3: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x20); + case 2: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x80); + case 1: break; + case 0: l = -1; return 0xFFFD; + } + + if (l != seq_sz || toolong) + { + l = -l; + return 0xFFFD; + } + return u; + } +}; + + +template <typename C> +class _utf_iterator +{ + typedef _utf_codec<sizeof(C)*8> codec; + + C * cp; + mutable int8 sl; + +public: + typedef C codeunit_type; + typedef uchar_t value_type; + typedef uchar_t * pointer; + + class reference + { + const _utf_iterator & _i; + + reference(const _utf_iterator & i): _i(i) {} + public: + operator value_type () const throw () { return codec::get(_i.cp, _i.sl); } + reference & operator = (const value_type usv) throw() { codec::put(_i.cp, usv, _i.sl); return *this; } + + friend class _utf_iterator; + }; + + + _utf_iterator(const void * us=0) : cp(reinterpret_cast<C *>(const_cast<void *>(us))), sl(1) { } + + _utf_iterator & operator ++ () { cp += abs(sl); return *this; } + _utf_iterator operator ++ (int) { _utf_iterator tmp(*this); operator++(); return tmp; } + + bool operator == (const _utf_iterator & rhs) const throw() { return cp >= rhs.cp; } + bool operator != (const _utf_iterator & rhs) const throw() { return !operator==(rhs); } + + reference operator * () const throw() { return *this; } + pointer operator ->() const throw() { return &operator *(); } + + operator codeunit_type * () const throw() { return cp; } + + bool error() const throw() { return sl < 1; } +}; + +template <typename C> +struct utf +{ + typedef typename _utf_codec<sizeof(C)*8>::codeunit_t codeunit_t; + + typedef _utf_iterator<C> iterator; + typedef _utf_iterator<const C> const_iterator; +}; + + +typedef utf<uint32> utf32; +typedef utf<uint16> utf16; +typedef utf<uint8> utf8; + +} // namespace graphite2 diff --git a/src/files.mk b/src/files.mk index 9504e60..355e374 100644 --- a/src/files.mk +++ b/src/files.mk @@ -63,7 +63,8 @@ $(_NS)_SOURCES = \ $($(_NS)_BASE)/src/Silf.cpp \ $($(_NS)_BASE)/src/Slot.cpp \ $($(_NS)_BASE)/src/Sparse.cpp \ - $($(_NS)_BASE)/src/TtfUtil.cpp + $($(_NS)_BASE)/src/TtfUtil.cpp \ + $($(_NS)_BASE)/src/UtfCodec.cpp $(_NS)_PRIVATE_HEADERS = \ $($(_NS)_BASE)/src/CachedFace.h \ @@ -96,6 +97,7 @@ $(_NS)_PRIVATE_HEADERS = \ $($(_NS)_BASE)/src/Sparse.h \ $($(_NS)_BASE)/src/TtfTypes.h \ $($(_NS)_BASE)/src/TtfUtil.h \ + $($(_NS)_BASE)/src/UtfCodec.h \ $($(_NS)_BASE)/src/XmlTraceLog.h \ $($(_NS)_BASE)/src/XmlTraceLogTags.h diff --git a/src/gr_segment.cpp b/src/gr_segment.cpp index f6cf52d..0c1d6d4 100644 --- a/src/gr_segment.cpp +++ b/src/gr_segment.cpp @@ -25,46 +25,13 @@ License, as published by the Free Software Foundation, either version 2 of the License or (at your option) any later version. */ #include "graphite2/Segment.h" -#include "processUTF.h" +#include "UtfCodec.h" #include "Segment.h" using namespace graphite2; namespace { - template <class LIMIT, class CHARPROCESSOR> - size_t doCountUnicodeCharacters(const LIMIT& limit, CHARPROCESSOR* pProcessor, const void** pError) - { - BreakOnError breakOnError; - - processUTF(limit/*when to stop processing*/, pProcessor, &breakOnError); - if (pError) { - *pError = breakOnError.m_pErrorPos; - } - return pProcessor->charsProcessed(); - } - - class CharCounterToNul - { - public: - CharCounterToNul() - : m_nCharsProcessed(0) - { - } - - bool processChar(uint32 cid/*unicode character*/, size_t /*offset*/) //return value indicates if should stop processing - { - if (cid==0) - return false; - ++m_nCharsProcessed; - return true; - } - - size_t charsProcessed() const { return m_nCharsProcessed; } - - private: - size_t m_nCharsProcessed ; - }; gr_segment* makeAndInitialize(const Font *font, const Face *face, uint32 script, const Features* pFeats/*must not be NULL*/, gr_encform enc, const void* pStart, size_t nChars, int dir) { @@ -92,23 +59,43 @@ namespace } -extern "C" { +template <typename utf_iter> +inline size_t count_unicode_chars(utf_iter first, const utf_iter last, const void **error) +{ + size_t n_chars = 0; + uint32 usv = 0; + + if (last) + { + for (;first != last; ++first, ++n_chars) + if ((usv = *first) == 0 || first.error()) break; + } + else + { + while ((usv = *first) != 0 && !first.error()) + { + ++first; + ++n_chars; + } + } + + if (error) *error = first.error() ? first : 0; + return n_chars; +} +extern "C" { size_t gr_count_unicode_characters(gr_encform enc, const void* buffer_begin, const void* buffer_end/*don't go on or past end, If NULL then ignored*/, const void** pError) //Also stops on nul. Any nul is not in the count { - if (buffer_end) - { - BufferLimit limit(enc, buffer_begin, buffer_end); - CharCounterToNul counter; - return doCountUnicodeCharacters(limit, &counter, pError); - } - else - { - NoLimit limit(enc, buffer_begin); - CharCounterToNul counter; - return doCountUnicodeCharacters(limit, &counter, pError); - } + assert(buffer_begin); + + switch (enc) + { + case gr_utf8: return count_unicode_chars<utf8::const_iterator>(buffer_begin, buffer_end, pError); break; + case gr_utf16: return count_unicode_chars<utf16::const_iterator>(buffer_begin, buffer_end, pError); break; + case gr_utf32: return count_unicode_chars<utf32::const_iterator>(buffer_begin, buffer_end, pError); break; + default: return 0; + } } diff --git a/src/processUTF.h b/src/processUTF.h deleted file mode 100644 index 18c5e6a..0000000 --- a/src/processUTF.h +++ /dev/null @@ -1,494 +0,0 @@ -/* GRAPHITE2 LICENSING - - Copyright 2010, SIL International - All rights reserved. - - This library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published - by the Free Software Foundation; either version 2.1 of License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should also have received a copy of the GNU Lesser General Public - License along with this library in the file named "LICENSE". - If not, write to the Free Software Foundation, 51 Franklin Street, - Suite 500, Boston, MA 02110-1335, USA or visit their web page on the - internet at http://www.fsf.org/licenses/lgpl.html. - -Alternatively, the contents of this file may be used under the terms of the -Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public -License, as published by the Free Software Foundation, either version 2 -of the License or (at your option) any later version. -*/ -#pragma once - -#include "Main.h" -#include "graphite2/Segment.h" - -namespace graphite2 { - -class NoLimit //relies on the processor.processChar() failing, such as because of a terminating nul character -{ -public: - NoLimit(gr_encform enc2, const void* pStart2) : m_enc(enc2), m_pStart(pStart2) {} - gr_encform enc() const { return m_enc; } - const void* pStart() const { return m_pStart; } - - bool inBuffer(const void* /*pCharLastSurrogatePart*/, uint32 /*val*/) const { return true; } - bool needMoreChars(const void* /*pCharStart*/, size_t /*nProcessed*/) const { return true; } - -private: - gr_encform m_enc; - const void* m_pStart; -}; - - -class CharacterCountLimit -{ -public: - CharacterCountLimit(gr_encform enc2, const void* pStart2, size_t numchars) : m_numchars(numchars), m_enc(enc2), m_pStart(pStart2) {} - gr_encform enc() const { return m_enc; } - const void* pStart() const { return m_pStart; } - - bool inBuffer (const void* /*pCharLastSurrogatePart*/, uint32 val) const { return (val != 0); } - bool needMoreChars (const void* /*pCharStart*/, size_t nProcessed) const { return nProcessed<m_numchars; } - -private: - size_t m_numchars; - gr_encform m_enc; - const void* m_pStart; -}; - - -class BufferLimit -{ -public: - BufferLimit(gr_encform enc2, const void* pStart2, const void* pEnd/*as in stl i.e. don't use end*/) : m_enc(enc2), m_pStart(pStart2) { - size_t nFullTokens = (static_cast<const char*>(pEnd)-static_cast<const char *>(m_pStart))/int(m_enc); //rounds off partial tokens - m_pEnd = static_cast<const char *>(m_pStart) + (nFullTokens*int(m_enc)); - } - gr_encform enc() const { return m_enc; } - const void* pStart() const { return m_pStart; } - - bool inBuffer (const void* pCharLastSurrogatePart, uint32 /*val*/) const { return pCharLastSurrogatePart<m_pEnd; } //also called on charstart by needMoreChars() - - bool needMoreChars (const void* pCharStart, size_t /*nProcessed*/) const { return inBuffer(pCharStart, 1); } - -private: - const void* m_pEnd; - gr_encform m_enc; - const void* m_pStart; -}; - - -class IgnoreErrors -{ -public: - //for all of the ignore* methods is the parameter is false, the return result must be true - static bool ignoreUnicodeOutOfRangeErrors(bool /*isBad*/) { return true; } - static bool ignoreBadSurrogatesErrors(bool /*isBad*/) { return true; } - - static bool handleError(const void* /*pPositionOfError*/) { return true;} -}; - - -class BreakOnError -{ -public: - BreakOnError() : m_pErrorPos(NULL) {} - - //for all of the ignore* methods is the parameter is false, the return result must be true - static bool ignoreUnicodeOutOfRangeErrors(bool isBad) { return !isBad; } - static bool ignoreBadSurrogatesErrors(bool isBad) { return !isBad; } - - bool handleError(const void* pPositionOfError) { m_pErrorPos=pPositionOfError; return false;} - -public: - const void* m_pErrorPos; -}; - - - - - -/* - const int utf8_extrabytes_lut[16] = {0,0,0,0,0,0,0,0, // 1 byte - 3,3,3,3, // errors since trailing byte, catch later - 1,1, // 2 bytes - 2, // 3 bytes - 3}; // 4 bytes - quicker to implement directly: -*/ - -inline unsigned int utf8_extrabytes(const unsigned int topNibble) { return (0xE5FF0000>>(2*topNibble))&0x3; } - -inline unsigned int utf8_mask(const unsigned int seq_extra) { return ((0xFEC0>>(4*seq_extra))&0xF)<<4; } - -class Utf8Consumer -{ -public: - Utf8Consumer(const uint8* pCharStart2) : m_pCharStart(pCharStart2) {} - - const uint8* pCharStart() const { return m_pCharStart; } - -private: - template <class ERRORHANDLER> - bool respondToError(uint32* pRes, ERRORHANDLER* pErrHandler) { //return value is if should stop parsing - *pRes = 0xFFFD; - if (!pErrHandler->handleError(m_pCharStart)) { - return false; - } - ++m_pCharStart; - return true; - } - -public: - template <class LIMIT, class ERRORHANDLER> - inline bool consumeChar(const LIMIT& limit, uint32* pRes, ERRORHANDLER* pErrHandler) { //At start, limit.inBuffer(m_pCharStart) is true. return value is iff character contents does not go past limit - const unsigned int seq_extra = utf8_extrabytes(*m_pCharStart >> 4); //length of sequence including *m_pCharStart is 1+seq_extra - if (!limit.inBuffer(m_pCharStart+(seq_extra), *m_pCharStart)) { - return false; - } - - *pRes = *m_pCharStart ^ utf8_mask(seq_extra); - - if (seq_extra) { - switch(seq_extra) { //hopefully the optimizer will implement this as a jump table. If not the above if should cover the majority case. - case 3: { - if (pErrHandler->ignoreUnicodeOutOfRangeErrors(*m_pCharStart>=0xF8)) { //the good case - ++m_pCharStart; - if (!pErrHandler->ignoreBadSurrogatesErrors((*m_pCharStart&0xC0)!=0x80)) { - return respondToError(pRes, pErrHandler); - } - - *pRes <<= 6; *pRes |= *m_pCharStart & 0x3F; //drop through - } - else { - return respondToError(pRes, pErrHandler); - } - } - case 2: { - ++m_pCharStart; - if (!pErrHandler->ignoreBadSurrogatesErrors((*m_pCharStart&0xC0)!=0x80)) { - return respondToError(pRes, pErrHandler); - } - } - *pRes <<= 6; *pRes |= *m_pCharStart & 0x3F; //drop through - case 1: { - ++m_pCharStart; - if (!pErrHandler->ignoreBadSurrogatesErrors((*m_pCharStart&0xC0)!=0x80)) { - return respondToError(pRes, pErrHandler); - } - } - *pRes <<= 6; *pRes |= *m_pCharStart & 0x3F; - } - } - ++m_pCharStart; - return true; - } - -private: - const uint8 *m_pCharStart; -}; - - - -class Utf16Consumer -{ -public: - Utf16Consumer(const uint16* pCharStart2) : m_pCharStart(pCharStart2) {} - - const uint16* pCharStart() const { return m_pCharStart; } - -private: - template <class ERRORHANDLER> - bool respondToError(uint32* pRes, ERRORHANDLER* pErrHandler) { //return value is if should stop parsing - *pRes = 0xFFFD; - if (!pErrHandler->handleError(m_pCharStart)) { - return false; - } - ++m_pCharStart; - return true; - } - -public: - template <class LIMIT, class ERRORHANDLER> - inline bool consumeChar(const LIMIT& limit, uint32* pRes, ERRORHANDLER* pErrHandler) //At start, limit.inBuffer(m_pCharStart) is true. return value is iff character contents does not go past limit - { - *pRes = *m_pCharStart; - if (0xD800 > *pRes || !pErrHandler->ignoreUnicodeOutOfRangeErrors(*pRes >= 0xE000)) { - ++m_pCharStart; - return true; - } - - if (!pErrHandler->ignoreBadSurrogatesErrors(*pRes >= 0xDC00)) { //second surrogate is incorrectly coming first - return respondToError(pRes, pErrHandler); - } - - ++m_pCharStart; - if (!limit.inBuffer(m_pCharStart, *pRes)) { - return false; - } - - uint32 ul = *(m_pCharStart); - if (!pErrHandler->ignoreBadSurrogatesErrors(0xDC00 > ul || ul > 0xDFFF)) { - return respondToError(pRes, pErrHandler); - } - ++m_pCharStart; - *pRes = ((*pRes - 0xD800)<<10) + ul - 0xDC00; - return true; - } - -private: - const uint16 *m_pCharStart; -}; - - -class Utf32Consumer -{ -public: - Utf32Consumer(const uint32* pCharStart2) : m_pCharStart(pCharStart2) {} - - const uint32* pCharStart() const { return m_pCharStart; } - -private: - template <class ERRORHANDLER> - bool respondToError(uint32* pRes, ERRORHANDLER* pErrHandler) { //return value is if should stop parsing - *pRes = 0xFFFD; - if (!pErrHandler->handleError(m_pCharStart)) { - return false; - } - ++m_pCharStart; - return true; - } - -public: - template <class LIMIT, class ERRORHANDLER> - inline bool consumeChar(const LIMIT& limit, uint32* pRes, ERRORHANDLER* pErrHandler) //At start, limit.inBuffer(m_pCharStart) is true. return value is iff character contents does not go past limit - { - *pRes = *m_pCharStart; - if (pErrHandler->ignoreUnicodeOutOfRangeErrors(!(*pRes<0xD800 || (*pRes>=0xE000 && *pRes<0x110000)))) { - if (!limit.inBuffer(++m_pCharStart, *pRes)) - return false; - else - return true; - } - - return respondToError(pRes, pErrHandler); - } - -private: - const uint32 *m_pCharStart; -}; - - - - -/* The following template function assumes that LIMIT and CHARPROCESSOR have the following methods and semantics: - -class LIMIT -{ -public: - SegmentHandle::encform enc() const; //which of the below overloads of inBuffer() and needMoreChars() are called - const void* pStart() const; //start of first character to process - - bool inBuffer(const uint8* pCharLastSurrogatePart) const; //whether or not the input is considered to be in the range of the buffer. - bool inBuffer(const uint16* pCharLastSurrogatePart) const; //whether or not the input is considered to be in the range of the buffer. - - bool needMoreChars(const uint8* pCharStart, size_t nProcessed) const; //whether or not the input is considered to be in the range of the buffer, and sufficient characters have been processed. - bool needMoreChars(const uint16* pCharStart, size_t nProcessed) const; //whether or not the input is considered to be in the range of the buffer, and sufficient characters have been processed. - bool needMoreChars(const uint32* pCharStart, size_t nProcessed) const; //whether or not the input is considered to be in the range of the buffer, and sufficient characters have been processed. -}; - -class ERRORHANDLER -{ -public: - //for all of the ignore* methods is the parameter is false, the return result must be true - bool ignoreUnicodeOutOfRangeErrors(bool isBad) const; - bool ignoreBadSurrogatesErrors(bool isBad) const; - - bool handleError(const void* pPositionOfError); //returns true iff error handled and should continue -}; - -class CHARPROCESSOR -{ -public: - bool processChar(uint32 cid); //return value indicates if should stop processing - size_t charsProcessed() const; //number of characters processed. Usually starts from 0 and incremented by processChar(). Passed in to LIMIT::needMoreChars -}; - -Useful reusable examples of LIMIT are: -NoLimit //relies on the CHARPROCESSOR.processChar() failing, such as because of a terminating nul character -CharacterCountLimit //doesn't care about where the input buffer may end, but limits the number of unicode characters processed. -BufferLimit //processes how ever many characters there are until the buffer end. characters straggling the end are not processed. -BufferAndCharacterCountLimit //processes a maximum number of characters there are until the buffer end. characters straggling the end are not processed. - -Useful examples of ERRORHANDLER are IgnoreErrors, BreakOnError. -*/ - -template <class LIMIT, class CHARPROCESSOR, class ERRORHANDLER> -void processUTF(const LIMIT& limit/*when to stop processing*/, CHARPROCESSOR* pProcessor, ERRORHANDLER* pErrHandler) -{ - uint32 cid; - switch (limit.enc()) { - case gr_utf8 : { - const uint8 *pInit = static_cast<const uint8 *>(limit.pStart()); - Utf8Consumer consumer(pInit); - for (;limit.needMoreChars(consumer.pCharStart(), pProcessor->charsProcessed());) { - const uint8 *pCur = consumer.pCharStart(); - if (!consumer.consumeChar(limit, &cid, pErrHandler)) - break; - if (!pProcessor->processChar(cid, pCur - pInit)) - break; - } - break; } - case gr_utf16: { - const uint16* pInit = static_cast<const uint16 *>(limit.pStart()); - Utf16Consumer consumer(pInit); - for (;limit.needMoreChars(consumer.pCharStart(), pProcessor->charsProcessed());) { - const uint16 *pCur = consumer.pCharStart(); - if (!consumer.consumeChar(limit, &cid, pErrHandler)) - break; - if (!pProcessor->processChar(cid, pCur - pInit)) - break; - } - break; - } - case gr_utf32 : default: { - const uint32 *pInit = static_cast<const uint32 *>(limit.pStart()); - Utf32Consumer consumer(pInit); - for (;limit.needMoreChars(consumer.pCharStart(), pProcessor->charsProcessed());) { - const uint32 *pCur = consumer.pCharStart(); - if (!consumer.consumeChar(limit, &cid, pErrHandler)) - break; - if (!pProcessor->processChar(cid, pCur - pInit)) - break; - } - break; - } - } -} - - class ToUtf8Processor - { - public: - // buffer length should be three times the utf16 length or - // four times the utf32 length to cover the worst case - ToUtf8Processor(uint8 * buffer, size_t maxLength) : - m_count(0), m_byteLength(0), m_maxLength(maxLength), m_buffer(buffer) - {} - bool processChar(uint32 cid, size_t /*offset*/) - { - // taken from Unicode Book ch3.9 - if (cid <= 0x7F) - m_buffer[m_byteLength++] = cid; - else if (cid <= 0x07FF) - { - if (m_byteLength + 2 >= m_maxLength) - return false; - m_buffer[m_byteLength++] = 0xC0 + (cid >> 6); - m_buffer[m_byteLength++] = 0x80 + (cid & 0x3F); - } - else if (cid <= 0xFFFF) - { - if (m_byteLength + 3 >= m_maxLength) - return false; - m_buffer[m_byteLength++] = 0xE0 + (cid >> 12); - m_buffer[m_byteLength++] = 0x80 + ((cid & 0x0FC0) >> 6); - m_buffer[m_byteLength++] = 0x80 + (cid & 0x003F); - } - else if (cid <= 0x10FFFF) - { - if (m_byteLength + 4 >= m_maxLength) - return false; - m_buffer[m_byteLength++] = 0xF0 + (cid >> 18); - m_buffer[m_byteLength++] = 0x80 + ((cid & 0x3F000) >> 12); - m_buffer[m_byteLength++] = 0x80 + ((cid & 0x00FC0) >> 6); - m_buffer[m_byteLength++] = 0x80 + (cid & 0x0003F); - } - else - { - // ignore - } - m_count++; - if (m_byteLength >= m_maxLength) - return false; - return true; - } - size_t charsProcessed() const { return m_count; } - size_t bytesProcessed() const { return m_byteLength; } - private: - size_t m_count; - size_t m_byteLength; - size_t m_maxLength; - uint8 * m_buffer; - }; - - class ToUtf16Processor - { - public: - // buffer length should be twice the utf32 length - // to cover the worst case - ToUtf16Processor(uint16 * buffer, size_t maxLength) : - m_count(0), m_uint16Length(0), m_maxLength(maxLength), m_buffer(buffer) - {} - bool processChar(uint32 cid, size_t /*offset*/) - { - // taken from Unicode Book ch3.9 - if (cid <= 0xD800) - m_buffer[m_uint16Length++] = cid; - else if (cid < 0xE000) - { - // skip for now - } - else if (cid >= 0xE000 && cid <= 0xFFFF) - m_buffer[m_uint16Length++] = cid; - else if (cid <= 0x10FFFF) - { - if (m_uint16Length + 2 >= m_maxLength) - return false; - m_buffer[m_uint16Length++] = 0xD800 + ((cid & 0xFC00) >> 10) + ((cid >> 16) - 1); - m_buffer[m_uint16Length++] = 0xDC00 + ((cid & 0x03FF) >> 12); - } - else - { - // ignore - } - m_count++; - if (m_uint16Length == m_maxLength) - return false; - return true; - } - size_t charsProcessed() const { return m_count; } - size_t uint16Processed() const { return m_uint16Length; } - private: - size_t m_count; - size_t m_uint16Length; - size_t m_maxLength; - uint16 * m_buffer; - }; - - class ToUtf32Processor - { - public: - ToUtf32Processor(uint32 * buffer, size_t maxLength) : - m_count(0), m_maxLength(maxLength), m_buffer(buffer) {} - bool processChar(uint32 cid, size_t /*offset*/) - { - m_buffer[m_count++] = cid; - if (m_count == m_maxLength) - return false; - return true; - } - size_t charsProcessed() const { return m_count; } - private: - size_t m_count; - size_t m_maxLength; - uint32 * m_buffer; - }; - -} // namespace graphite2 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 541fb2e..4e9f7a6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -14,6 +14,7 @@ add_subdirectory(nametabletest) add_subdirectory(examples) add_subdirectory(grlist) add_subdirectory(endian) +add_subdirectory(utftest) enable_testing() diff --git a/tests/segcache/segcachetest.cpp b/tests/segcache/segcachetest.cpp index 25cea8f..3a4021c 100644 --- a/tests/segcache/segcachetest.cpp +++ b/tests/segcache/segcachetest.cpp @@ -27,7 +27,7 @@ #include "Segment.h" #include "SegCache.h" #include "SegCacheStore.h" -#include "processUTF.h" +#include "UtfCodec.h" #include "TtfTypes.h" #include "TtfUtil.h" @@ -35,24 +35,18 @@ using namespace graphite2; inline gr_face * api_cast(CachedFace *p) { return static_cast<gr_face*>(static_cast<Face*>(p)); } -class CmapProcessor +template <typename utf_itr> +void resolve_unicode_to_glyphs(const Face & face, utf_itr first, size_t n_chars, uint16 * glyphs) { -public: - CmapProcessor(Face * face, uint16 * buffer) : - m_cmapTable(TtfUtil::FindCmapSubtable(face->getTable("cmap", NULL), 3, 1)), - m_buffer(buffer), m_pos(0) {}; - bool processChar(uint32 cid, size_t /*offset*/) //return value indicates if should stop processing - { - assert(cid < 0xFFFF); // only lower plane supported for this test - m_buffer[m_pos++] = TtfUtil::Cmap31Lookup(m_cmapTable, cid); - return true; - } - size_t charsProcessed() const { return m_pos; } //number of characters processed. Usually starts from 0 and incremented by processChar(). Passed in to LIMIT::needMoreChars -private: - const void * m_cmapTable; - uint16 * m_buffer; - size_t m_pos; -}; + const void * cmap = TtfUtil::FindCmapSubtable(face.getTable("cmap", NULL), 3, 1); + + for (; n_chars; --n_chars, ++first) + { + const uint32 usv = *first; + assert(usv < 0xFFFF); // only lower plane supported for this test + *glyphs++ = TtfUtil::Cmap31Lookup(cmap, usv); + } +} bool checkEntries(CachedFace * face, const char * testString, uint16 * glyphString, size_t testLength) @@ -120,10 +114,7 @@ bool testSeg(CachedFace testString + strlen(testString), &badUtf8); *testGlyphString = gralloc<uint16>(*testLength + 1); - CharacterCountLimit limit(gr_utf8, testString, *testLength); - CmapProcessor cmapProcessor(face, *testGlyphString); - IgnoreErrors ignoreErrors; - processUTF(limit, &cmapProcessor, &ignoreErrors); + resolve_unicode_to_glyphs(*face, utf8::iterator(testString), *testLength, *testGlyphString); gr_segment * segA = gr_make_seg(sizedFont, api_cast(face), 0, NULL, gr_utf8, testString, *testLength, 0); @@ -149,11 +140,8 @@ int main(int argc, char ** argv) } FILE * log = fopen("grsegcache.xml", "w"); graphite_start_logging(log, GRLOG_SEGMENT); - CachedFace - *face = static_cast<CachedFace -*>(static_cast<Face -*>( - (gr_make_file_face_with_seg_cache(fileName, 10, gr_face_default)))); + CachedFace *face = static_cast<CachedFace *>(static_cast<Face *>( + gr_make_file_face_with_seg_cache(fileName, 10, gr_face_default))); if (!face) { fprintf(stderr, "Invalid font, failed to parse tables\n"); diff --git a/tests/utftest/CMakeLists.txt b/tests/utftest/CMakeLists.txt new file mode 100644 index 0000000..5048202 --- /dev/null +++ b/tests/utftest/CMakeLists.txt @@ -0,0 +1,15 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0 FATAL_ERROR) +project(utftest) +include(Graphite) +include_directories(../../src) + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + add_definitions(-D_SCL_SECURE_NO_WARNINGS -D_CRT_SECURE_NO_WARNINGS -DUNICODE) +endif (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + + +target_objects(OBJS graphite2) +add_executable(utftest utftest.cpp ${OBJS}) +add_dependencies(utftest graphite2) + +add_test(NAME utftest COMMAND $<TARGET_FILE:utftest>) diff --git a/tests/utftest/utftest.cpp b/tests/utftest/utftest.cpp new file mode 100644 index 0000000..bdcc9e7 --- /dev/null +++ b/tests/utftest/utftest.cpp @@ -0,0 +1,56 @@ +#include <graphite2/Segment.h> +#include <stdio.h> + +struct test +{ + int len, + error; + unsigned char str[12]; +}; +struct test tests[] = { + { 4, -1, {0x7F, 0xDF, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0, 0} }, // U+7F, U+7FF, U+FFFF, U+10FFF + { 2, 3, {0x7F, 0xDF, 0xBF, 0xF0, 0x8F, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0} }, // U+7F, U+7FF, long(U+FFFF), U+10FFF + { 1, 1, {0x7F, 0xE0, 0x9F, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0} }, // U+7F, long(U+7FF), U+FFFF, U+10FFF + { 0, 0, {0xC1, 0xBF, 0xDF, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0xBF, 0xBF, 0xBF, 0} }, // long(U+7F), U+7FF, U+FFFF, U+10FFF + { 4, -1, {0x01, 0xC2, 0x80, 0xE0, 0xA0, 0x80, 0xF0, 0x90, 0x80, 0x80, 0, 0} }, // U+01, U+80, U+800, U+10000 + { 1, 1, {0x65, 0x9F, 0x65, 0x65, 0, 0, 0, 0, 0, 0, 0, 0} }, // U+65 bad(1) U+65 U+65 + { 2, 2, {0x65, 0x65, 0xC2, 0xC2, 0x65, 0x65, 0, 0, 0, 0, 0, 0} }, // U+65 U+65 bad(1) bad(1) U+65 U+65 + { 2, 2, {0x65, 0x75, 0xE3, 0x84, 0x75, 0x75, 0, 0, 0, 0, 0, 0} }, // U+65 U+75 bad(2) U+75 U+75 + { 2, 2, {0x65, 0x75, 0xF3, 0x84, 0xA5, 0x75, 0x75, 0, 0, 0, 0, 0} }, // U+65 U+75 bad(3) U+75 U+75 + { 2, 2, {0x65, 0x75, 0xF3, 0x84, 0xA5, 0xF5, 0x75, 0, 0, 0, 0, 0} }, // U+65 U+75 bad(3) bad(1) U+75 +}; + +const int numtests = sizeof(tests)/sizeof(test); + +int main(int argc, char * argv[]) { + int i; + const void * error; + + for (i = 0; i < numtests; ++i) + { + int res = gr_count_unicode_characters(gr_utf8, tests[i].str, tests[i].str + sizeof(tests[i].str), &error); + if (tests[i].error >= 0) + { + if (!error) + { + fprintf(stderr, "%s: test %d failed: expected error condition did not occur\n", argv[0], i + 1); + return (i+1); + } + else if (ptrdiff_t(error) - ptrdiff_t(tests[i].str) != tests[i].error) + { + fprintf(stderr, "%s: test %d failed: error at codepoint %d expected at codepoint %d\n", argv[0], i + 1, int(ptrdiff_t(error) - ptrdiff_t(tests[i].str)), tests[i].len); + return (i+1); + } + } + else if (error) + { + fprintf(stderr, "%s: test %d failed: unexpected error occured at codepoint %d\n", argv[0], i + 1, int(ptrdiff_t(error) - ptrdiff_t(tests[i].str))); + return (i+1); + } + if (res != tests[i].len) + { + fprintf(stderr, "%s: test %d failed: character count failure %d != %d\n", argv[0], i + 1, res, tests[i].len); + return (i+1); + } + } +} diff --git a/tests/vm/CMakeLists.txt b/tests/vm/CMakeLists.txt index f2c2a3e..d5abe62 100644 --- a/tests/vm/CMakeLists.txt +++ b/tests/vm/CMakeLists.txt @@ -33,6 +33,7 @@ target_objects(OBJS graphite2 Slot.cpp Sparse.cpp TtfUtil.cpp + UtfCodec.cpp ${TRACESUPPORT}) add_library(vm-test-common STATIC basic_test.cpp ${OBJS}) add_dependencies(vm-test-common graphite2) -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-openoffice/graphite2.git

