source

Hossein (via logerrit) Fri, 07 Jan 2022 12:39:41 -0800

 i18nlangtag/source/isolang/mslangid.cxx |   21 +++++++++++++++++++++
 include/i18nlangtag/mslangid.hxx        |    3 +++
 sw/source/core/text/inftxt.cxx          |    4 +++-
 3 files changed, 27 insertions(+), 1 deletion(-)


New commits:
commit 151c56ed547490a99d912524c0e56b5d6d4a1939
Author:     Hossein <hoss...@libreoffice.org>
AuthorDate: Tue Jan 4 21:12:14 2022 +0100
Commit:     Eike Rathke <er...@redhat.com>
CommitDate: Fri Jan 7 21:38:54 2022 +0100

    tdf#146084 Don't warn for languages without hyphenation
    
    Upon opening a Writer document containing some languages that do not
    use hyphen, an alert is created with the text:
    
    'Missing hyphenation data Please install the hyphenation package for
    locale "ab_CD".'
    
    in which 'ab_CD' is the locale.
    
    This patch removes the warning for these languages, that do not use
    hyphenation:
    
    * Arabic script languages (except Uighur)
     + Persian (Farsi)
     + Kashmiri
     + Kurdish (Central Kurdish and Southern Kurdish with Arabic script)
     + Punjabi
     + Sindhi
     + Malai
     + Somali
     + Swahili
     + Urdu
    "Words are not hyphenated in Arabic language text, however hyphenation
    is possible for Uighur text written in the Arabic script"
    https://www.w3.org/International/i18n-tests/results/word-break-shaping
    The list from MS documents is lenghty, but some of the languages are
    were not available in LibreOffice, so they are ommited:
    https://docs.microsoft.com/en-us/typography/script-development/arabic
    There were languages like Hausa and Kanuri from Nigeria that use both
    Latin and Arabic script, but only Latin script was listed in the
    LibreOffice languages, so they were also ommited.
    
    * CJK languages
     + Japanese
     + Korean
     + Chinese
     + Yue Chinese
    "CJK languages differ from European languages in that there are no
    hyphenation rules"
    https://tug.org/TUGboat/tb25-0/cho.pdf
    
    * Vietnamese
    "In Vietnamese all words consist of single syllables, so they are
    often very short; hyphenation is not allowed at all."
    https://tug.org/TUGboat/tb29-1/tb91thanh-vntex.pdf
    Hyphenation is declined in Vietnamese orthography since 1975
    https://www.quora.com/When-did-hyphenation-decline-in-Vietnamese-orthography
    
    The fix for Japanese (tdf#143422) was previously done in:
    53d5555f13371252874ec962dee4643168d26780 and the functionality is
    preserverd with the current patch.
    
    An alternate approach would be adding all the unicode scripts,
    specifying the script for each langauge, and decide upon the script
    (mostly) and not (only) the language.
    
    More information about the hyphenation usage of many scripts can be
    found in:
    https://r12a.github.io/scripts/
    
    This is the list of Unicode scripts:
    https://unicode.org/standard/supported.html
    https://en.wikipedia.org/wiki/Script_(Unicode)#List_of_scripts_in_Unicode
    
    Change-Id: I7d2b4ee55a0893d1f0d1f9cd3b7cc037a49589b6
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/126435
    Tested-by: Jenkins
    Reviewed-by: Eike Rathke <er...@redhat.com>

diff --git a/i18nlangtag/source/isolang/mslangid.cxx 
b/i18nlangtag/source/isolang/mslangid.cxx
index 71f6b7b49e66..ad062a8d3dcf 100644
--- a/i18nlangtag/source/isolang/mslangid.cxx
+++ b/i18nlangtag/source/isolang/mslangid.cxx
@@ -165,6 +165,27 @@ LanguageType MsLangId::resolveSystemLanguageByScriptType( 
LanguageType nLang, sa
     return nLang;
 }
 
+// static
+bool MsLangId::usesHyphenation(LanguageType nLang)
+{
+    if (primary(nLang).anyOf(
+            primary(LANGUAGE_ARABIC_PRIMARY_ONLY),
+            primary(LANGUAGE_FARSI),
+            primary(LANGUAGE_KASHMIRI),
+            primary(LANGUAGE_KURDISH_ARABIC_IRAQ),
+            primary(LANGUAGE_PUNJABI),
+            primary(LANGUAGE_SINDHI),
+            primary(LANGUAGE_USER_MALAY_ARABIC_MALAYSIA),
+            primary(LANGUAGE_SOMALI),
+            primary(LANGUAGE_SWAHILI),
+            primary(LANGUAGE_URDU_PAKISTAN))
+        || isCJK(nLang))
+    {
+        return false;
+    }
+    return true;
+}
+
 
 // static
 css::lang::Locale MsLangId::Conversion::convertLanguageToLocale(
diff --git a/include/i18nlangtag/mslangid.hxx b/include/i18nlangtag/mslangid.hxx
index 4966fde852f3..aee1f23eb816 100644
--- a/include/i18nlangtag/mslangid.hxx
+++ b/include/i18nlangtag/mslangid.hxx
@@ -126,6 +126,9 @@ public:
     static LanguageType resolveSystemLanguageByScriptType( LanguageType nLang, 
sal_Int16 nType );
 
 
+    /** Whether the language uses hyphenation. */
+    static bool usesHyphenation( LanguageType nLang );
+
     /** Whether locale has a Right-To-Left orientation for text. */
     static bool isRightToLeft( LanguageType nLang );
 
diff --git a/sw/source/core/text/inftxt.cxx b/sw/source/core/text/inftxt.cxx
index 14eb8b13c11d..a5e588ebad0b 100644
--- a/sw/source/core/text/inftxt.cxx
+++ b/sw/source/core/text/inftxt.cxx
@@ -66,6 +66,7 @@
 #include <vcl/gdimtf.hxx>
 #include <vcl/virdev.hxx>
 #include <vcl/gradient.hxx>
+#include <i18nlangtag/mslangid.hxx>
 
 using namespace ::com::sun::star;
 using namespace ::com::sun::star::linguistic2;
@@ -1446,7 +1447,8 @@ bool SwTextFormatInfo::IsHyphenate() const
 
     LanguageType eTmp = GetFont()->GetLanguage();
     // TODO: check for more ideographic langs w/o hyphenation as a concept
-    if ( LANGUAGE_DONTKNOW == eTmp || LANGUAGE_NONE == eTmp || 
LANGUAGE_JAPANESE == eTmp )
+    if ( LANGUAGE_DONTKNOW == eTmp || LANGUAGE_NONE == eTmp
+            || !MsLangId::usesHyphenation(eTmp) )
         return false;
 
     uno::Reference< XHyphenator > xHyph = ::GetHyphenator();

[Libreoffice-commits] core.git: i18nlangtag/source include/i18nlangtag sw/source

Reply via email to