filter/inc/filter/msfilter/util.hxx | 18 +++++ filter/source/msfilter/util.cxx | 127 ++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+)
New commits: commit 1a8d78656a5773d285a430f6f88602beda173a8e Author: Caolán McNamara <caol...@redhat.com> Date: Mon Aug 13 15:07:13 2012 +0100 The missing docs as to the ranges of the MS text categories especially the definition of Complex Scripts Change-Id: I693936f1dd150aefd4a632530439acf8edcbdd74 diff --git a/filter/inc/filter/msfilter/util.hxx b/filter/inc/filter/msfilter/util.hxx index d772a24..607566a 100644 --- a/filter/inc/filter/msfilter/util.hxx +++ b/filter/inc/filter/msfilter/util.hxx @@ -70,6 +70,24 @@ MSFILTER_DLLPUBLIC rtl::OString DateTimeToOString( const DateTime& rDateTime ); MSFILTER_DLLPUBLIC sal_Unicode bestFitOpenSymbolToMSFont(sal_Unicode cBullet, rtl_TextEncoding& r_ioChrSet, rtl::OUString& r_ioFontName, bool bDisableUnicodeSupport = false); + +enum TextCategory +{ + latin, //Latin + cs, //Complex Script + ea, //East Asian + sym //Symbol +}; + +/** Categorize codepoints according to how MS seems to do it. + + It's been bugging me for ages as to what codepoint MS considers in + what category. Tom Jebo has a post suggesting the criteria used here + and indicating its been submitting to the standards working group + as a proposed resolution. +*/ +MSFILTER_DLLPUBLIC TextCategory categorizeCodePoint(sal_uInt32 codePoint, const rtl::OUString &rBcp47LanguageTag); + } } diff --git a/filter/source/msfilter/util.cxx b/filter/source/msfilter/util.cxx index ddb9f14..db8aa6c 100644 --- a/filter/source/msfilter/util.cxx +++ b/filter/source/msfilter/util.cxx @@ -179,6 +179,133 @@ sal_Unicode bestFitOpenSymbolToMSFont(sal_Unicode cChar, return cChar; } +/* + http://social.msdn.microsoft.com/Forums/hu-HU/os_openXML-ecma/thread/1bf1f185-ee49-4314-94e7-f4e1563b5c00 + + The following information is being submitted to the standards working group as + a proposed resolution to a defect report and is not yet part of ISO 29500-1. + ... + For each Unicode character in DrawingML text, the font face can be any of four + font âslotsâ: latin (§21.1.2.3.7), cs (§21.1.2.3.1), ea (§21.1.2.3.3), or sym + (§21.1.2.3.10), as specified in the following table. For all ranges not + explicitly called out below, the ea font shall be used. + + U+0000âU+007F Use latin font + U+0080âU+00A6 Use latin font + U+00A9âU+00AF Use latin font + U+00B2âU+00B3 Use latin font + U+00B5âU+00D6 Use latin font + U+00D8âU+00F6 Use latin font + U+00F8âU+058F Use latin font + U+0590âU+074F Use cs font + U+0780âU+07BF Use cs font + U+0900âU+109F Use cs font + U+10A0âU+10FF Use latin font + U+1200âU+137F Use latin font + U+13A0âU+177F Use latin font + U+1D00âU+1D7F Use latin font + U+1E00âU+1FFF Use latin font + U+1780âU+18AF Use cs font + U+2000âU+200B Use latin font + U+200CâU+200F Use cs font + U+2010âU+2029 Use latin font Except, for the quote characters in the range + 2018 â 201E, use ea font if the text has one of the following language + identifiers: ii-CN, ja-JP, ko-KR, zh-CN, zh-HK, zh-MO, zh-SG, zh-TW. + U+202AâU+202F Use cs font + U+2030âU+2046 Use latin font + U+204AâU+245F Use latin font + U+2670âU+2671 Use cs font + U+27C0âU+2BFF Use latin font + U+3099âU+309A Use ea font + U+D835 Use latin font + U+F000âU+F0FF Symbol, use sym font + U+FB00âU+FB17 Use latin font + U+FB1DâU+FB4F Use cs font + U+FE50âU+FE6F Use latin font + Otherwise Use ea font +*/ +TextCategory categorizeCodePoint(sal_uInt32 codePoint, const rtl::OUString &rBcp47LanguageTag) +{ + TextCategory eRet = ea; + if (codePoint <= 0x007F) + eRet = latin; + else if (0x0080 <= codePoint && codePoint <= 0x00A6) + eRet = latin; + else if (0x00A9 <= codePoint && codePoint <= 0x00AF) + eRet = latin; + else if (0x00B2 <= codePoint && codePoint <= 0x00B3) + eRet = latin; + else if (0x00B5 <= codePoint && codePoint <= 0x00D6) + eRet = latin; + else if (0x00D8 <= codePoint && codePoint <= 0x00F6) + eRet = latin; + else if (0x00F8 <= codePoint && codePoint <= 0x058F) + eRet = latin; + else if (0x0590 <= codePoint && codePoint <= 0x074F) + eRet = cs; + else if (0x0780 <= codePoint && codePoint <= 0x07BF) + eRet = cs; + else if (0x0900 <= codePoint && codePoint <= 0x109F) + eRet = cs; + else if (0x10A0 <= codePoint && codePoint <= 0x10FF) + eRet = latin; + else if (0x1200 <= codePoint && codePoint <= 0x137F) + eRet = latin; + else if (0x13A0 <= codePoint && codePoint <= 0x177F) + eRet = latin; + else if (0x1D00 <= codePoint && codePoint <= 0x1D7F) + eRet = latin; + else if (0x1E00 <= codePoint && codePoint <= 0x1FFF) + eRet = latin; + else if (0x1780 <= codePoint && codePoint <= 0x18AF) + eRet = cs; + else if (0x2000 <= codePoint && codePoint <= 0x200B) + eRet = latin; + else if (0x200C <= codePoint && codePoint <= 0x200F) + eRet = cs; + else if (0x2010 <= codePoint && codePoint <= 0x2029) + { + eRet = latin; + if (0x2018 <= codePoint && codePoint <= 0x201E) + { + if (rBcp47LanguageTag == "ii-CN" || + rBcp47LanguageTag == "ja-JP" || + rBcp47LanguageTag == "ko-KR" || + rBcp47LanguageTag == "zh-CN" || + rBcp47LanguageTag == "zh-HK" || + rBcp47LanguageTag == "zh-MO" || + rBcp47LanguageTag == "zh-SG" || + rBcp47LanguageTag == "zh-TW") + { + eRet = ea; + } + } + } + else if (0x202A <= codePoint && codePoint <= 0x202F) + eRet = cs; + else if (0x2030 <= codePoint && codePoint <= 0x2046) + eRet = latin; + else if (0x204A <= codePoint && codePoint <= 0x245F) + eRet = latin; + else if (0x2670 <= codePoint && codePoint <= 0x2671) + eRet = latin; + else if (0x27C0 <= codePoint && codePoint <= 0x2BFF) + eRet = latin; + else if (0x3099 <= codePoint && codePoint <= 0x309A) + eRet = ea; + else if (0xD835 == codePoint) + eRet = latin; + else if (0xF000 <= codePoint && codePoint <= 0xF0FF) + eRet = sym; + else if (0xFB00 <= codePoint && codePoint <= 0xFB17) + eRet = latin; + else if (0xFB1D <= codePoint && codePoint <= 0xFB4F) + eRet = cs; + else if (0xFE50 <= codePoint && codePoint <= 0xFE6F) + eRet = latin; + return eRet; +} + } }
_______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits