commit nuspell for openSUSE:Factory

Source-Sync Mon, 18 Jan 2021 02:26:25 -0800

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package nuspell for openSUSE:Factory checked 
in at 2021-01-18 11:26:07
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/nuspell (Old)
 and      /work/SRC/openSUSE:Factory/.nuspell.new.28504 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "nuspell"

Mon Jan 18 11:26:07 2021 rev:4 rq:862717 version:4.2.0

Changes:
--------
--- /work/SRC/openSUSE:Factory/nuspell/nuspell.changes  2020-11-29 
12:19:08.777438895 +0100
+++ /work/SRC/openSUSE:Factory/.nuspell.new.28504/nuspell.changes       
2021-01-18 11:26:12.988398614 +0100
@@ -1,0 +2,9 @@
+Tue Jan 12 21:07:52 UTC 2021 - andy great <andythe_gr...@pm.me>
+
+- Update to version 4.2.0.
+  * Deprecate functions that allowed non-Unicode encoding. In 
+    particular, Dictionary::imbue() and Dictionary::imbue_utf8().
+  * Completely remove dependency on Boost. The CLI tools were 
+    refactored to use ICU directly.
+
+-------------------------------------------------------------------

Old:
----
  nuspell-4.1.0.tar.gz

New:
----
  nuspell-4.2.0.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ nuspell.spec ++++++
--- /var/tmp/diff_new_pack.6vyn1t/_old  2021-01-18 11:26:13.744400356 +0100
+++ /var/tmp/diff_new_pack.6vyn1t/_new  2021-01-18 11:26:13.748400365 +0100
@@ -1,7 +1,7 @@
 #
 # spec file for package nuspell
 #
-# Copyright (c) 2020 SUSE LLC
+# Copyright (c) 2021 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -19,7 +19,7 @@
 %define sonum   4
 %define libname libnuspell
 Name:           nuspell
-Version:        4.1.0
+Version:        4.2.0
 Release:        0
 Summary:        A spell checker library and command-line tool
 License:        LGPL-3.0-or-later
@@ -30,7 +30,6 @@
 BuildRequires:  cmake
 BuildRequires:  doxygen
 BuildRequires:  gcc-c++
-BuildRequires:  libboost_locale-devel
 BuildRequires:  libicu-devel
 BuildRequires:  rubygem(%{rb_default_ruby_abi}:ronn)
 Requires:       hunspell
@@ -107,7 +106,7 @@
 %files -n %{libname}%{sonum}
 %doc README.md CHANGELOG.md AUTHORS
 %license COPYING.LESSER COPYING
-%{_libdir}/%{libname}.so.%{sonum}.1.0
+%{_libdir}/%{libname}.so.%{version}
 %exclude %{_datadir}/doc/nuspell/README.md
 
 %files devel

++++++ nuspell-4.1.0.tar.gz -> nuspell-4.2.0.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/nuspell-4.1.0/.travis.yml 
new/nuspell-4.2.0/.travis.yml
--- old/nuspell-4.1.0/.travis.yml       2020-11-19 11:56:49.000000000 +0100
+++ new/nuspell-4.2.0/.travis.yml       2020-12-12 23:40:04.000000000 +0100
@@ -1,9 +1,9 @@
 language: cpp
-dist: bionic
 
-os:
-- linux
-- osx
+os: [ linux, osx ]
+
+dist: bionic
+osx_image: xcode11.3
 
 env:
   global:
@@ -13,30 +13,25 @@
   - CXXFLAGS=-fsanitize=address
   - CXXFLAGS=-fsanitize=undefined
   - CXXFLAGS="-Wall -Wextra -Werror"
-  - CONFIGURE_ARGS="-DBUILD_SHARED_LIBS=1 -DCMAKE_BUILD_TYPE=Release"
+  - CONFIGURE_ARGS="-DCMAKE_BUILD_TYPE=Release"
 
 matrix:
   include:
-# set B2_ARGS to non-empty to use custom build Boost. B2_ARGS=" " is sufficient
   - os: linux
-    env: CXXFLAGS="-D_GLIBCXX_DEBUG -O1" B2_ARGS=define=_GLIBCXX_DEBUG
+    env: CXXFLAGS="-D_GLIBCXX_DEBUG -O1"
   - os: windows
     env: CTEST_ARGS="-C Debug"
   - os: windows
-    env: CONFIGURE_ARGS="-DBUILD_SHARED_LIBS=1" BUILD_ARGS="--config Release" 
CTEST_ARGS="-C Release"
+    env: BUILD_ARGS="--config Release" CTEST_ARGS="-C Release"
+
+addons:
+  apt:
+    packages: [ libicu-dev ]
+  homebrew:
+    packages: [ icu4c ]
 
 before_install:
 - |
-  if [ "$B2_ARGS" ]; then
-    cd ~ &&
-    wget 
https://dl.bintray.com/boostorg/release/1.64.0/source/boost_1_64_0.tar.bz2 -O - 
| tar -xj &&
-    cd -
-  fi
-- |
-  if [ -z "$B2_ARGS" ] && [ "$TRAVIS_OS_NAME" = linux ]; then
-    sudo apt-get install -y libboost-locale-dev
-  fi
-- |
   if [ "$TRAVIS_OS_NAME" = windows ]; then
     # This should be added to PATH as on local Windows Git Bash install,
     # but it isn't on travis. xz is there
@@ -45,16 +40,6 @@
   fi
 
 install:
-# if we update to boost 1.67 we can add define=BOOST_LOCALE_HIDE_AUTO_PTR to b2
-- |
-  if [ "$B2_ARGS" ]; then
-    cd ~/boost_1_64_0 &&
-    ./bootstrap.sh --with-libraries=locale --with-toolset="$CC" &&
-    ./b2 -d0 -j 4 cxxflags=-std=c++14 $B2_ARGS &&
-    cd - &&
-    export BOOST_ROOT=~/boost_1_64_0              &&
-    export LD_LIBRARY_PATH=$BOOST_ROOT/stage/lib
-  fi
 - |
   if [ "$TRAVIS_OS_NAME" = linux ]; then
     export CMAKE_BUILD_PARALLEL_LEVEL=4
@@ -62,7 +47,7 @@
 - |
   if [ "$TRAVIS_OS_NAME" = osx ]; then
     export ICU_ROOT=$(brew --prefix icu4c)
-    BUILD_ARGS+=" -- -j 4"                 #parallel build
+    export CMAKE_BUILD_PARALLEL_LEVEL=4
   fi
 - |
   if [ "$TRAVIS_OS_NAME" = windows ]; then
@@ -77,13 +62,3 @@
 - ctest $CTEST_ARGS
 
 after_failure: cat Testing/Temporary/LastTest.log
-
-addons:
-#  apt:
-#    packages:
-#    - libicu-dev
-#    - libboost-locale-dev
-  homebrew:
-    packages:
-    - icu4c
-    - boost
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/nuspell-4.1.0/CHANGELOG.md 
new/nuspell-4.2.0/CHANGELOG.md
--- old/nuspell-4.1.0/CHANGELOG.md      2020-11-19 11:56:49.000000000 +0100
+++ new/nuspell-4.2.0/CHANGELOG.md      2020-12-12 23:40:04.000000000 +0100
@@ -6,16 +6,25 @@
 The format is based on [Keep a 
Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic 
Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [4.2.0] - 2020-12-12
+### Deprecated
+- Deprecate functions that allowed non-Unicode encoding. In particular,
+  `Dictionary::imbue()` and `Dictionary::imbue_utf8()`.
+
+### Removed
+- Completely remove dependency on Boost. The CLI tools were refactored to use
+  ICU directly.
+
 ## [4.1.0] - 2020-11-19
-## Added
+### Added
 - Add new API for finding dictionaries on the file-system. It is a set of free
   functions located in the file finder.hxx.
 
-## Fixed
+### Fixed
 - Improve searching for dictionaries on the file-system. Fix finding them on
   Fedora. Fixes #94.
 
-## Deprecated
+### Deprecated
 - Deprecate the old API for finding dictionaries, i.e. the class `Finder`
   in the file finder.hxx.
 
@@ -175,7 +184,9 @@
 - Spelling error detection (checking) is closely matching Hunspell
 - Support for spelling error correction (suggestions)
 
-[Unreleased]: https://github.com/nuspell/nuspell/compare/v4.0.1...HEAD
+[Unreleased]: https://github.com/nuspell/nuspell/compare/v4.2.0...HEAD
+[4.2.0]: https://github.com/nuspell/nuspell/compare/v4.1.0...v4.2.0
+[4.1.0]: https://github.com/nuspell/nuspell/compare/v4.0.1...v4.1.0
 [4.0.1]: https://github.com/nuspell/nuspell/compare/v4.0.0...v4.0.1
 [4.0.0]: https://github.com/nuspell/nuspell/compare/v3.1.2...v4.0.0
 [3.1.2]: https://github.com/nuspell/nuspell/compare/v3.1.1...v3.1.2
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/nuspell-4.1.0/CMakeLists.txt 
new/nuspell-4.2.0/CMakeLists.txt
--- old/nuspell-4.1.0/CMakeLists.txt    2020-11-19 11:56:49.000000000 +0100
+++ new/nuspell-4.2.0/CMakeLists.txt    2020-12-12 23:40:04.000000000 +0100
@@ -1,14 +1,13 @@
 cmake_minimum_required(VERSION 3.8)
-project(nuspell VERSION 4.1.0)
+project(nuspell VERSION 4.2.0)
 set(PROJECT_HOMEPAGE_URL "https://nuspell.github.io/";)
 
-option(BUILD_SHARED_LIBS "Buils as shared library" ON)
+option(BUILD_SHARED_LIBS "Build as shared library" ON)
 
 include(GNUInstallDirs)
 include(CMakePackageConfigHelpers)
 
 find_package(ICU REQUIRED COMPONENTS uc data)
-find_package(Boost 1.48.0 REQUIRED COMPONENTS locale)
 
 get_directory_property(subproject PARENT_DIRECTORY)
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/nuspell-4.1.0/README.md new/nuspell-4.2.0/README.md
--- old/nuspell-4.1.0/README.md 2020-11-19 11:56:49.000000000 +0100
+++ new/nuspell-4.2.0/README.md 2020-12-12 23:40:04.000000000 +0100
@@ -34,7 +34,6 @@
 Run-time (and build-time) dependencies:
 
   - ICU4C
-  - Boost Locale >= v1.48 (needed only for the CLI tool, not the library)
 
 Recommended tools for developers: qtcreator, ninja, clang-format, gdb,
 vim, doxygen.
@@ -47,7 +46,7 @@
 For Ubuntu and Debian:
 
 ```bash
-sudo apt install git cmake libboost-locale-dev libicu-dev
+sudo apt install git cmake libicu-dev
 ```
 
 Then run the following commands inside the Nuspell directory:
@@ -81,7 +80,7 @@
 <!-- end list -->
 
 ```bash
-brew install cmake icu4c boost
+brew install cmake icu4c
 export ICU_ROOT=$(brew --prefix icu4c)
 ```
 
@@ -102,7 +101,7 @@
     Visual Studio Build Tools.
 2.  Install Git for Windows and Cmake.
 3.  Install vcpkg in some folder, e.g. in `c:\vcpkg`.
-4.  With vcpkg install: icu, boost-locale\[icu\].
+4.  With vcpkg install: icu.
 5.  Run the commands bellow.
 
 <!-- end list -->
@@ -119,7 +118,7 @@
 Download MSYS2, update everything and install the following packages:
 
 ```bash
-pacman -S base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-boost \
+pacman -S base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-icu \
           mingw-w64-x86_64-cmake
 ```
 
@@ -144,7 +143,7 @@
 Install the following required packages
 
 ```bash
-pkg cmake icu boost-libs catch
+pkg cmake icu catch
 ```
 
 Then run the standard cmake and make as on Linux. See above.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/nuspell-4.1.0/docs/Third-party_licenses 
new/nuspell-4.2.0/docs/Third-party_licenses
--- old/nuspell-4.1.0/docs/Third-party_licenses 2020-11-19 11:56:49.000000000 
+0100
+++ new/nuspell-4.2.0/docs/Third-party_licenses 2020-12-12 23:40:04.000000000 
+0100
@@ -93,7 +93,7 @@
 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGE.
 
-Boost license:
+Catch2 license:
 
 Boost Software License - Version 1.0 - August 17th, 2003
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/nuspell-4.1.0/src/nuspell/CMakeLists.txt 
new/nuspell-4.2.0/src/nuspell/CMakeLists.txt
--- old/nuspell-4.1.0/src/nuspell/CMakeLists.txt        2020-11-19 
11:56:49.000000000 +0100
+++ new/nuspell-4.2.0/src/nuspell/CMakeLists.txt        2020-12-12 
23:40:04.000000000 +0100
@@ -33,7 +33,7 @@
     RUNTIME_OUTPUT_NAME nuspell)
 target_compile_definitions(nuspell-bin PRIVATE
     PROJECT_VERSION=\"${PROJECT_VERSION}\")
-target_link_libraries(nuspell-bin nuspell Boost::locale)
+target_link_libraries(nuspell-bin nuspell)
 if (BUILD_SHARED_LIBS AND WIN32)
     # This should be PRE_LINK (or PRE_BUILD), so Vcpkg's POST_BUILD
     # step (see VCPKG_APPLOCAL_DEPS) that copies dll can pick up nuspell.dll
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/nuspell-4.1.0/src/nuspell/dictionary.cxx 
new/nuspell-4.2.0/src/nuspell/dictionary.cxx
--- old/nuspell-4.1.0/src/nuspell/dictionary.cxx        2020-11-19 
11:56:49.000000000 +0100
+++ new/nuspell-4.2.0/src/nuspell/dictionary.cxx        2020-12-12 
23:40:04.000000000 +0100
@@ -3107,6 +3107,11 @@
  * convert the strings from the external locale to UTF-32 on non-Windows
  * platforms, and to UTF-16 on Windows.
  *
+ * @deprecated You should always feed dictionary with words encoded in UTF-8,
+ * and you should not use this function to set other encodings. The recommened
+ * way to get words out of text is via the algorithm known as Unicode text
+ * segmentation which only works on text encoded in Unicode anyway.
+ *
  * @param loc locale object with valid codecvt<wchar_t, char, mbstate_t>
  */
 auto Dictionary::imbue(const locale& loc) -> void
@@ -3119,6 +3124,8 @@
  * @brief Sets external (public API) encoding to UTF-8
  *
  * Call this only if you used imbue() and want to revert it to UTF-8.
+ *
+ * @deprecated see imbue()
  */
 auto Dictionary::imbue_utf8() -> void { external_locale_known_utf8 = true; }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/nuspell-4.1.0/src/nuspell/dictionary.hxx 
new/nuspell-4.2.0/src/nuspell/dictionary.hxx
--- old/nuspell-4.1.0/src/nuspell/dictionary.hxx        2020-11-19 
11:56:49.000000000 +0100
+++ new/nuspell-4.2.0/src/nuspell/dictionary.hxx        2020-12-12 
23:40:04.000000000 +0100
@@ -407,8 +407,8 @@
            -> Dictionary;
        auto static load_from_path(
            const std::string& file_path_without_extension) -> Dictionary;
-       auto imbue(const std::locale& loc) -> void;
-       auto imbue_utf8() -> void;
+       [[deprecated]] auto imbue(const std::locale& loc) -> void;
+       [[deprecated]] auto imbue_utf8() -> void;
        auto spell(std::string_view word) const -> bool;
        auto suggest(std::string_view word, std::vector<std::string>& out) const
            -> void;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/nuspell-4.1.0/src/nuspell/main.cxx 
new/nuspell-4.2.0/src/nuspell/main.cxx
--- old/nuspell-4.1.0/src/nuspell/main.cxx      2020-11-19 11:56:49.000000000 
+0100
+++ new/nuspell-4.2.0/src/nuspell/main.cxx      2020-12-12 23:40:04.000000000 
+0100
@@ -19,16 +19,27 @@
 #include "dictionary.hxx"
 #include "finder.hxx"
 
-#include <boost/locale.hpp>
+#include <cassert>
 #include <fstream>
 #include <iomanip>
 #include <iostream>
+#include <unicode/brkiter.h>
+#include <unicode/ucnv.h>
 
 #if defined(__MINGW32__) || defined(__unix__) || defined(__unix) ||            
\
     (defined(__APPLE__) && defined(__MACH__))
 #include <getopt.h>
 #include <unistd.h>
 #endif
+#ifdef _POSIX_VERSION
+#include <langinfo.h>
+#endif
+#ifdef _WIN32
+#include <io.h>
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
 
 // manually define if not supplied by the build system
 #ifndef PROJECT_VERSION
@@ -280,13 +291,57 @@
        }
 }
 
-auto process_word(
-    Mode mode, const Dictionary& dic, const string& line,
-    string::const_iterator b, string::const_iterator c, string& word,
-    vector<pair<string::const_iterator, string::const_iterator>>& wrong_words,
-    vector<string>& suggestions, ostream& out)
+auto to_utf8(string_view source, string& dest, UConverter* ucnv,
+             UErrorCode& uerr)
+{
+       dest.resize(dest.capacity());
+       auto len = ucnv_toAlgorithmic(UCNV_UTF8, ucnv, dest.data(), dest.size(),
+                                     source.data(), source.size(), &uerr);
+       dest.resize(len);
+       if (uerr == U_BUFFER_OVERFLOW_ERROR) {
+               uerr = U_ZERO_ERROR;
+               ucnv_toAlgorithmic(UCNV_UTF8, ucnv, dest.data(), dest.size(),
+                                  source.data(), source.size(), &uerr);
+       }
+}
+
+auto from_utf8(string_view source, string& dest, UConverter* ucnv,
+               UErrorCode& uerr)
+{
+       dest.resize(dest.capacity());
+       auto len =
+           ucnv_fromAlgorithmic(ucnv, UCNV_UTF8, dest.data(), dest.size(),
+                                source.data(), source.size(), &uerr);
+       dest.resize(len);
+       if (uerr == U_BUFFER_OVERFLOW_ERROR) {
+               uerr = U_ZERO_ERROR;
+               ucnv_fromAlgorithmic(ucnv, UCNV_UTF8, dest.data(), dest.size(),
+                                    source.data(), source.size(), &uerr);
+       }
+}
+
+auto to_unicode_string(string_view source, icu::UnicodeString& dest,
+                       UConverter* ucnv, UErrorCode& uerr)
+{
+       auto buf = dest.getBuffer(-1);
+       auto len = ucnv_toUChars(ucnv, buf, dest.getCapacity(), source.data(),
+                                source.size(), &uerr);
+       if (uerr == U_BUFFER_OVERFLOW_ERROR) {
+               uerr = U_ZERO_ERROR;
+               dest.releaseBuffer(0);
+               buf = dest.getBuffer(len);
+               if (!buf)
+                       throw bad_alloc();
+               len = ucnv_toUChars(ucnv, buf, dest.getCapacity(),
+                                   source.data(), source.size(), &uerr);
+       }
+       dest.releaseBuffer(len);
+}
+
+auto process_word(Mode mode, const Dictionary& dic, string_view word,
+                  size_t pos_word, vector<string_view>& wrong_words,
+                  vector<string>& suggestions, ostream& out)
 {
-       word.assign(b, c);
        auto correct = dic.spell(word);
        switch (mode) {
        case DEFAULT_MODE: {
@@ -295,7 +350,6 @@
                        break;
                }
                dic.suggest(word, suggestions);
-               auto pos_word = b - begin(line);
                if (suggestions.empty()) {
                        out << "# " << word << ' ' << pos_word << '\n';
                        break;
@@ -319,18 +373,66 @@
        case MISSPELLED_LINES_MODE:
        case CORRECT_LINES_MODE:
                if (!correct)
-                       wrong_words.emplace_back(b, c);
+                       wrong_words.push_back(word);
                break;
        default:
                break;
        }
 }
 
-auto process_line(
-    Mode mode, const string& line,
-    const vector<pair<string::const_iterator, string::const_iterator>>&
-        wrong_words,
-    ostream& out)
+auto process_word_other_encoding(Mode mode, const Dictionary& dic,
+                                 string_view word, string_view u8word,
+                                 size_t pos_word,
+                                 vector<string_view>& wrong_words,
+                                 vector<string>& suggestions, ostream& out,
+                                 UConverter* ucnv, UErrorCode& uerr)
+{
+       auto correct = dic.spell(u8word);
+       switch (mode) {
+       case DEFAULT_MODE: {
+               if (correct) {
+                       out << "*\n";
+                       break;
+               }
+               dic.suggest(u8word, suggestions);
+               if (suggestions.empty()) {
+                       out << "# " << word << ' ' << pos_word << '\n';
+                       break;
+               }
+               out << "& " << word << ' ' << suggestions.size() << ' '
+                   << pos_word << ": ";
+               auto sug_in_encoding = string();
+               from_utf8(suggestions[0], sug_in_encoding, ucnv, uerr);
+               out << sug_in_encoding;
+               for_each(begin(suggestions) + 1, end(suggestions),
+                        [&](const string& u8sug) {
+                                out << ", ";
+                                from_utf8(u8sug, sug_in_encoding, ucnv, uerr);
+                                out << sug_in_encoding;
+                        });
+               out << '\n';
+               break;
+       }
+       case MISSPELLED_WORDS_MODE:
+               if (!correct)
+                       out << word << '\n';
+               break;
+       case CORRECT_WORDS_MODE:
+               if (correct)
+                       out << word << '\n';
+               break;
+       case MISSPELLED_LINES_MODE:
+       case CORRECT_LINES_MODE:
+               if (!correct)
+                       wrong_words.push_back(word);
+               break;
+       default:
+               break;
+       }
+}
+
+auto finish_line(Mode mode, const string& line,
+                 const vector<string_view>& wrong_words, ostream& out)
 {
        switch (mode) {
        case DEFAULT_MODE:
@@ -350,81 +452,140 @@
 }
 
 auto whitespace_segmentation_loop(istream& in, ostream& out,
-                                  const Dictionary& dic, Mode mode)
+                                  const Dictionary& dic, Mode mode,
+                                  UConverter* ucnv, UErrorCode& uerr)
 {
        auto line = string();
-       auto word = string();
        auto suggestions = vector<string>();
-       using Str_Iter = string::const_iterator;
-       auto wrong_words = vector<pair<Str_Iter, Str_Iter>>();
+       auto wrong_words = vector<string_view>();
        auto loc = in.getloc();
-       auto line_num = size_t(0);
        auto& facet = use_facet<ctype<char>>(loc);
        auto isspace = [&](char c) { return facet.is(facet.space, c); };
+       auto u8word = string();
+       auto is_utf8 = ucnv_getType(ucnv) == UCNV_UTF8;
+
        while (getline(in, line)) {
-               ++line_num;
                wrong_words.clear();
                for (auto a = begin(line); a != end(line);) {
-                       auto b = find_if_not(a, end(line), isspace);
-                       if (b == end(line))
+                       a = find_if_not(a, end(line), isspace);
+                       if (a == end(line))
                                break;
-                       auto c = find_if(b, end(line), isspace);
-
-                       process_word(mode, dic, line, b, c, word, wrong_words,
-                                    suggestions, out);
-
-                       a = c;
+                       auto b = find_if(a, end(line), isspace);
+                       auto word = string_view(&*a, distance(a, b));
+                       auto pos_word = distance(begin(line), a);
+                       if (is_utf8) {
+                               process_word(mode, dic, word, pos_word,
+                                            wrong_words, suggestions, out);
+                       }
+                       else {
+                               to_utf8(word, u8word, ucnv, uerr);
+                               process_word_other_encoding(
+                                   mode, dic, word, u8word, pos_word,
+                                   wrong_words, suggestions, out, ucnv, uerr);
+                       }
+                       a = b;
                }
-               process_line(mode, line, wrong_words, out);
+               finish_line(mode, line, wrong_words, out);
        }
 }
 
-auto unicode_segentation_loop(istream& in, ostream& out, const Dictionary& dic,
-                              Mode mode)
+auto is_word_break(int32_t typ)
 {
-       namespace b = boost::locale::boundary;
-       auto line = string();
-       auto word = string();
-       auto suggestions = vector<string>();
-       using Str_Iter = string::const_iterator;
-       auto wrong_words = vector<pair<Str_Iter, Str_Iter>>();
-       auto loc = in.getloc();
-       auto line_num = size_t(0);
-       auto index = b::ssegment_index();
-       index.rule(b::word_any);
-       auto line_stream = istringstream();
-       while (getline(in, line)) {
-               ++line_num;
-               index.map(b::word, begin(line), end(line), loc);
-               wrong_words.clear();
-               auto a = cbegin(line);
-               for (auto& segment : index) {
-                       auto b = begin(segment);
-                       auto c = end(segment);
+       return (UBRK_WORD_NUMBER <= typ && typ < UBRK_WORD_NUMBER_LIMIT) ||
+              (UBRK_WORD_LETTER <= typ && typ < UBRK_WORD_LETTER_LIMIT) ||
+              (UBRK_WORD_KANA <= typ && typ < UBRK_WORD_KANA_LIMIT) ||
+              (UBRK_WORD_IDEO <= typ && typ < UBRK_WORD_IDEO_LIMIT);
+}
 
-                       process_word(mode, dic, line, b, c, word, wrong_words,
+auto segment_line_utf8(Mode mode, const Dictionary& dic, const string& line,
+                       UText* utext, icu::BreakIterator* ubrkiter,
+                       UErrorCode& uerr, vector<string>& suggestions,
+                       vector<string_view>& wrong_words, ostream& out)
+{
+       utext_openUTF8(utext, line.data(), line.size(), &uerr);
+       ubrkiter->setText(utext, uerr);
+       for (auto i = ubrkiter->first(), prev = 0; i != ubrkiter->DONE;
+            prev = i, i = ubrkiter->next()) {
+               auto typ = ubrkiter->getRuleStatus();
+               if (is_word_break(typ)) {
+                       auto word = string_view(line).substr(prev, i - prev);
+                       process_word(mode, dic, word, prev, wrong_words,
                                     suggestions, out);
-
-                       a = c;
                }
-               process_line(mode, line, wrong_words, out);
        }
+       finish_line(mode, line, wrong_words, out);
+       assert(U_SUCCESS(uerr));
 }
 
-namespace std {
-ostream& operator<<(ostream& out, const locale& loc)
+auto segment_line_generic(Mode mode, const Dictionary& dic, const string& line,
+                          icu::UnicodeString& uline, UConverter* ucnv,
+                          icu::BreakIterator* ubrkiter, UErrorCode& uerr,
+                          string& u8word, vector<string>& suggestions,
+                          vector<string_view>& wrong_words, ostream& out)
 {
-       if (has_facet<boost::locale::info>(loc)) {
-               auto& f = use_facet<boost::locale::info>(loc);
-               out << "name=" << f.name() << ", lang=" << f.language()
-                   << ", country=" << f.country() << ", enc=" << f.encoding();
+       to_unicode_string(line, uline, ucnv, uerr);
+       ubrkiter->setText(uline);
+       size_t orig_prev = 0, orig_i = 0;
+       auto src = line.c_str();
+       auto src_end = src + line.size();
+
+       ucnv_resetToUnicode(ucnv);
+       for (auto i = ubrkiter->first(), prev = 0; i != ubrkiter->DONE;
+            prev = i, i = ubrkiter->next(), orig_prev = orig_i) {
+
+               for (auto j = prev; j != i; ++j) {
+                       auto cp = ucnv_getNextUChar(ucnv, &src, src_end, &uerr);
+
+                       // U_IS_SURROGATE(uline[j]) or
+                       // U_IS_LEAD(uline[j]) can work too
+                       j += !U_IS_BMP(cp);
+               }
+               orig_i = distance(line.c_str(), src);
+
+               auto typ = ubrkiter->getRuleStatus();
+               if (is_word_break(typ)) {
+                       auto uword = uline.tempSubStringBetween(prev, i);
+                       u8word.clear();
+                       uword.toUTF8String(u8word);
+                       auto word = string_view(line).substr(
+                           orig_prev, orig_i - orig_prev);
+                       process_word_other_encoding(
+                           mode, dic, word, u8word, orig_prev, wrong_words,
+                           suggestions, out, ucnv, uerr);
+               }
        }
-       else {
-               out << loc.name();
+       finish_line(mode, line, wrong_words, out);
+       assert(U_SUCCESS(uerr));
+}
+
+auto unicode_segentation_loop(istream& in, ostream& out, const Dictionary& dic,
+                              Mode mode, UConverter* ucnv, UErrorCode& uerr)
+{
+       auto line = string();
+       auto suggestions = vector<string>();
+       auto wrong_words = vector<string_view>();
+
+       // TODO: try to use Locale constructed from dictionary name.
+       auto ubrkiter = unique_ptr<icu::BreakIterator>(
+           icu::BreakIterator::createWordInstance(icu::Locale(), uerr));
+       auto utext = icu::LocalUTextPointer(
+           utext_openUTF8(nullptr, line.data(), line.size(), &uerr));
+       auto uline = icu::UnicodeString();
+       auto u8word = string();
+       auto is_utf8 = ucnv_getType(ucnv) == UCNV_UTF8;
+
+       while (getline(in, line)) {
+               wrong_words.clear();
+               if (is_utf8)
+                       segment_line_utf8(mode, dic, line, utext.getAlias(),
+                                         ubrkiter.get(), uerr, suggestions,
+                                         wrong_words, out);
+               else
+                       segment_line_generic(mode, dic, line, uline, ucnv,
+                                            ubrkiter.get(), uerr, u8word,
+                                            suggestions, wrong_words, out);
        }
-       return out;
 }
-} // namespace std
 
 int main(int argc, char* argv[])
 {
@@ -432,30 +593,6 @@
        ios_base::sync_with_stdio(false);
 
        auto args = Args_t(argc, argv);
-       if (args.mode == ERROR_MODE) {
-               cerr << "Invalid (combination of) arguments, try '"
-                    << args.program_name << " --help' for more information\n";
-               return 1;
-       }
-       boost::locale::generator gen;
-       auto loc = std::locale();
-       try {
-               if (args.encoding.empty())
-                       loc = gen("");
-               else
-                       loc = gen("en_US." + args.encoding);
-       }
-       catch (const boost::locale::conv::invalid_charset_error& e) {
-               cerr << e.what() << '\n';
-#ifdef _POSIX_VERSION
-               cerr << "Nuspell error: see `locale -m` for supported "
-                       "encodings.\n";
-#endif
-               return 1;
-       }
-       cin.imbue(loc);
-       cout.imbue(loc);
-
        switch (args.mode) {
        case HELP_MODE:
                print_help(args.program_name);
@@ -463,26 +600,64 @@
        case VERSION_MODE:
                print_version();
                return 0;
+       case ERROR_MODE:
+               cerr << "Invalid (combination of) arguments, try '"
+                    << args.program_name << " --help' for more information\n";
+               return 1;
        default:
                break;
        }
-       clog << "INFO: I/O  locale " << loc << '\n';
-
        auto f = Dict_Finder_For_CLI_Tool();
-
        if (args.mode == LIST_DICTIONARIES_MODE) {
                list_dictionaries(f);
                return 0;
        }
+       char* loc_str = nullptr;
+#ifdef _WIN32
+       loc_str = setlocale(LC_CTYPE, nullptr); // will return "C"
+
+       /* On Windows, the console is a buggy thing. If the default C locale is
+       active, then the encoding of the strings gotten from C or C++ stdio
+       (fgets, scanf, cin) is GetConsoleCP(). Stdout accessed via standard
+       functions (printf, cout) expects encoding of GetConsoleOutputCP() which
+       is the same as GetConsoleCP() unless manually changed. By default both
+       are the active OEM encoding, unless changed with the command chcp, or by
+       calling the Set functions.
+
+       If we call setlocale(LC_CTYPE, ""), or let's say setlocale(LC_CTYPE,
+       ".1251"), then stdin will still return in the encoding GetConsoleCP(),
+       but stdout functions like printf now will expect a different encoding,
+       the one set via setlocale. Because of this mess don't change locale with
+       setlocale on Windows.
+
+       When stdin or stout are redirected from/to file or another terminal like
+       the one in MSYS2, they are read/written as-is. Then we will assume UTF-8
+       encoding. */
+#else
+       loc_str = setlocale(LC_CTYPE, "");
+       if (!loc_str) {
+               clog << "WARNING: Invalid locale string, fall back to \"C\".\n";
+               loc_str = setlocale(LC_CTYPE, nullptr); // will return "C"
+       }
+#endif
+       auto loc_str_sv = string_view(loc_str);
+       if (args.encoding.empty()) {
+#if _POSIX_VERSION
+               auto enc_str = nl_langinfo(CODESET);
+               args.encoding = enc_str;
+#elif _WIN32
+               if (_isatty(_fileno(stdin)) || _isatty(_fileno(stdout)))
+                       args.encoding = "cp" + to_string(GetConsoleCP());
+               else
+                       args.encoding = "UTF-8";
+#endif
+       }
+       clog << "INFO: Locale LC_CTYPE=" << loc_str_sv
+            << ", Used encoding=" << args.encoding << '\n';
        if (args.dictionary.empty()) {
                // infer dictionary from locale
-               auto& info = use_facet<boost::locale::info>(loc);
-               args.dictionary = info.language();
-               auto c = info.country();
-               if (!c.empty()) {
-                       args.dictionary += '_';
-                       args.dictionary += c;
-               }
+               auto idx = min(loc_str_sv.find('.'), loc_str_sv.find('@'));
+               args.dictionary = loc_str_sv.substr(0, idx);
        }
        if (args.dictionary.empty()) {
                cerr << "No dictionary provided and can not infer from OS "
@@ -502,13 +677,31 @@
                cerr << e.what() << '\n';
                return 1;
        }
-       if (!use_facet<boost::locale::info>(loc).utf8())
-               dic.imbue(loc);
+       // ICU reports all types of errors, logic errors and runtime errors
+       // using this enum. We should not check for logic errors, they should
+       // not happend. Optionally, only assert that they are not there can be
+       // used. We should check for runtime errors.
+       // The encoding conversion is a common case where runtime error can
+       // happen, but by default ICU uses Unicode replacement character on
+       // errors and reprots success. This can be changed, but there is no need
+       // for that.
+       auto uerr = U_ZERO_ERROR;
+       auto enc_cstr = args.encoding.c_str();
+       if (args.encoding.empty()) {
+               enc_cstr = nullptr;
+               clog << "WARNING: using default ICU encoding converter for IO"
+                    << endl;
+       }
+       auto ucnv = icu::LocalUConverterPointer(ucnv_open(enc_cstr, &uerr));
+       if (U_FAILURE(uerr)) {
+               cerr << "ERROR: Invalid encoding " << args.encoding << ".\n";
+               return 1;
+       }
        auto loop_function = unicode_segentation_loop;
        if (args.whitespace_segmentation)
                loop_function = whitespace_segmentation_loop;
        if (args.files.empty()) {
-               loop_function(cin, cout, dic, args.mode);
+               loop_function(cin, cout, dic, args.mode, ucnv.getAlias(), uerr);
        }
        else {
                for (auto& file_name : args.files) {
@@ -517,8 +710,8 @@
                                cerr << "Can't open " << file_name << '\n';
                                return 1;
                        }
-                       in.imbue(loc);
-                       loop_function(in, cout, dic, args.mode);
+                       loop_function(in, cout, dic, args.mode, ucnv.getAlias(),
+                                     uerr);
                }
        }
        return 0;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/nuspell-4.1.0/tests/CMakeLists.txt 
new/nuspell-4.2.0/tests/CMakeLists.txt
--- old/nuspell-4.1.0/tests/CMakeLists.txt      2020-11-19 11:56:49.000000000 
+0100
+++ new/nuspell-4.2.0/tests/CMakeLists.txt      2020-12-12 23:40:04.000000000 
+0100
@@ -4,7 +4,7 @@
     structures_test.cxx
     utils_test.cxx
     catch_main.cxx)
-target_link_libraries(unit_test nuspell Catch2::Catch2 Boost::locale)
+target_link_libraries(unit_test nuspell Catch2::Catch2)
 if (MSVC)
     target_compile_options(unit_test PRIVATE "/utf-8")
     # Consider doing this for all the other targets by setting this flag
@@ -15,7 +15,7 @@
 target_link_libraries(legacy_test nuspell)
 
 add_executable(verify verify.cxx)
-target_link_libraries(verify nuspell hunspell Boost::locale)
+target_link_libraries(verify nuspell hunspell)
 
 if (BUILD_SHARED_LIBS AND WIN32)
     add_custom_command(TARGET verify PRE_LINK
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/nuspell-4.1.0/tests/utils_test.cxx 
new/nuspell-4.2.0/tests/utils_test.cxx
--- old/nuspell-4.1.0/tests/utils_test.cxx      2020-11-19 11:56:49.000000000 
+0100
+++ new/nuspell-4.2.0/tests/utils_test.cxx      2020-12-12 23:40:04.000000000 
+0100
@@ -18,7 +18,6 @@
 
 #include <nuspell/utils.hxx>
 
-#include <boost/locale.hpp>
 #include <catch2/catch.hpp>
 
 using namespace std;
@@ -118,44 +117,19 @@
 
 TEST_CASE("to_wide", "[locale_utils]")
 {
-       auto gen = boost::locale::generator();
-       gen.characters(boost::locale::wchar_t_facet);
-       gen.categories(boost::locale::codepage_facet);
-       auto loc = gen("en_US.UTF-8");
-       auto in = string("\U0010FFFF ??");
-       CHECK(L"\U0010FFFF ??" == to_wide(in, loc));
-
-       in = "\U00011D59\U00011D59\U00011D59\U00011D59\U00011D59";
-       auto out = wstring();
-       auto exp = L"\U00011D59\U00011D59\U00011D59\U00011D59\U00011D59";
-       CHECK(true == to_wide(in, loc, out));
-       CHECK(exp == out);
-
-       loc = locale(locale::classic(), new latin1_codecvt());
-       in = "abcd\xDF";
+       auto loc = locale(locale::classic(), new latin1_codecvt());
+       auto in = "abcd\xDF";
        CHECK(L"abcd??" == to_wide(in, loc));
 }
 
 TEST_CASE("to_narrow", "[locale_utils]")
 {
-       auto gen = boost::locale::generator();
-       gen.characters(boost::locale::wchar_t_facet);
-       gen.categories(boost::locale::codepage_facet);
-       auto loc = gen("en_US.UTF-8");
-       auto in = wstring(L"\U0010FFFF ??");
-       CHECK("\U0010FFFF ??" == to_narrow(in, loc));
-
-       in = L"\U00011D59\U00011D59\U00011D59\U00011D59\U00011D59";
-       auto out = string();
-       CHECK(true == to_narrow(in, out, loc));
-       CHECK("\U00011D59\U00011D59\U00011D59\U00011D59\U00011D59" == out);
-
-       loc = locale(locale::classic(), new latin1_codecvt());
-       in = L"abcd??";
+       auto loc = locale(locale::classic(), new latin1_codecvt());
+       auto in = L"abcd??";
        CHECK("abcd\xDF" == to_narrow(in, loc));
 
        in = L"\U00011D59\U00011D59\U00011D59\U00011D59\U00011D59";
-       out = string();
+       auto out = string();
        CHECK(false == to_narrow(in, out, loc));
        CHECK(all_of(begin(out), end(out), [](auto c) { return c == '?'; }));
 }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/nuspell-4.1.0/tests/verify.cxx 
new/nuspell-4.2.0/tests/verify.cxx
--- old/nuspell-4.1.0/tests/verify.cxx  2020-11-19 11:56:49.000000000 +0100
+++ new/nuspell-4.2.0/tests/verify.cxx  2020-12-12 23:40:04.000000000 +0100
@@ -16,16 +16,15 @@
  * along with Nuspell.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <boost/locale.hpp>
 #include <hunspell/hunspell.hxx>
 #include <nuspell/dictionary.hxx>
 #include <nuspell/finder.hxx>
-#include <nuspell/utils.hxx>
 
 #include <chrono>
 #include <fstream>
 #include <iomanip>
 #include <iostream>
+#include <unicode/ucnv.h>
 
 #if defined(__MINGW32__) || defined(__unix__) || defined(__unix) ||            
\
     (defined(__APPLE__) && defined(__MACH__))
@@ -33,6 +32,7 @@
 #include <unistd.h>
 #endif
 #ifdef _POSIX_VERSION
+#include <langinfo.h>
 #include <sys/resource.h>
 #include <sys/time.h>
 #endif
@@ -205,31 +205,84 @@
 #endif
 }
 
-auto normal_loop(istream& in, ostream& out, Dictionary& dic, Hunspell& hun,
-                 locale& hloc, bool print_false = false, bool test_sugs = 
false)
+auto to_utf8(string_view source, string& dest, UConverter* ucnv,
+             UErrorCode& uerr)
 {
+       dest.resize(dest.capacity());
+       auto len = ucnv_toAlgorithmic(UCNV_UTF8, ucnv, dest.data(), dest.size(),
+                                     source.data(), source.size(), &uerr);
+       dest.resize(len);
+       if (uerr == U_BUFFER_OVERFLOW_ERROR) {
+               uerr = U_ZERO_ERROR;
+               ucnv_toAlgorithmic(UCNV_UTF8, ucnv, dest.data(), dest.size(),
+                                  source.data(), source.size(), &uerr);
+       }
+}
+
+auto from_utf8(string_view source, string& dest, UConverter* ucnv,
+               UErrorCode& uerr)
+{
+       dest.resize(dest.capacity());
+       auto len =
+           ucnv_fromAlgorithmic(ucnv, UCNV_UTF8, dest.data(), dest.size(),
+                                source.data(), source.size(), &uerr);
+       dest.resize(len);
+       if (uerr == U_BUFFER_OVERFLOW_ERROR) {
+               uerr = U_ZERO_ERROR;
+               ucnv_fromAlgorithmic(ucnv, UCNV_UTF8, dest.data(), dest.size(),
+                                    source.data(), source.size(), &uerr);
+       }
+}
+
+auto normal_loop(const Args_t& args, const Dictionary& dic, Hunspell& hun,
+                 istream& in, ostream& out)
+{
+       auto print_false = args.print_false;
+       auto test_sugs = args.sugs;
        auto word = string();
-       auto wide_word = wstring();
-       auto narrow_word = string();
-       // total number of words
+       auto u8_buffer = string();
+       auto hun_word = string();
        auto total = 0;
-       // total number of words with identical spelling correctness
        auto true_pos = 0;
        auto true_neg = 0;
        auto false_pos = 0;
        auto false_neg = 0;
-       // store cpu time for Hunspell and Nuspell
        auto duration_hun = chrono::high_resolution_clock::duration();
        auto duration_nu = duration_hun;
        auto in_loc = in.getloc();
+
+       auto uerr = U_ZERO_ERROR;
+       auto io_cnv = icu::LocalUConverterPointer(
+           ucnv_open(args.encoding.c_str(), &uerr));
+       if (U_FAILURE(uerr))
+               throw runtime_error("Invalid io encoding");
+       auto hun_enc =
+           nuspell::Encoding(hun.get_dict_encoding()).value_or_default();
+       auto hun_cnv =
+           icu::LocalUConverterPointer(ucnv_open(hun_enc.c_str(), &uerr));
+       if (U_FAILURE(uerr))
+               throw runtime_error("Invalid hun encoding");
+       auto io_is_utf8 = ucnv_getType(io_cnv.getAlias()) == UCNV_UTF8;
+       auto hun_is_utf8 = ucnv_getType(hun_cnv.getAlias()) == UCNV_UTF8;
+
        // need to take entine line here, not `in >> word`
        while (getline(in, word)) {
+               auto u8_word = string_view();
                auto tick_a = chrono::high_resolution_clock::now();
-               auto res_nu = dic.spell(word);
+               if (io_is_utf8) {
+                       u8_word = word;
+               }
+               else {
+                       to_utf8(word, u8_buffer, io_cnv.getAlias(), uerr);
+                       u8_word = u8_buffer;
+               }
+               auto res_nu = dic.spell(u8_word);
                auto tick_b = chrono::high_resolution_clock::now();
-               to_wide(word, in_loc, wide_word);
-               to_narrow(wide_word, narrow_word, hloc);
-               auto res_hun = hun.spell(narrow_word);
+               if (hun_is_utf8)
+                       hun_word = u8_word;
+               else
+                       from_utf8(u8_word, hun_word, hun_cnv.getAlias(), uerr);
+               auto res_hun = hun.spell(hun_word);
                auto tick_c = chrono::high_resolution_clock::now();
                duration_nu += tick_b - tick_a;
                duration_hun += tick_c - tick_b;
@@ -260,7 +313,7 @@
                        auto nus_sugs = vector<string>();
                        auto hun_sugs = vector<string>();
                        dic.suggest(word, nus_sugs);
-                       hun.suggest(narrow_word);
+                       hun.suggest(hun_word);
                }
        }
        out << "Total Words         " << total << '\n';
@@ -281,51 +334,12 @@
        out << "Speedup Rate        " << speedup << '\n';
 }
 
-namespace std {
-ostream& operator<<(ostream& out, const locale& loc)
-{
-       if (has_facet<boost::locale::info>(loc)) {
-               auto& f = use_facet<boost::locale::info>(loc);
-               out << "name=" << f.name() << ", lang=" << f.language()
-                   << ", country=" << f.country() << ", enc=" << f.encoding();
-       }
-       else {
-               out << loc.name();
-       }
-       return out;
-}
-} // namespace std
-
 int main(int argc, char* argv[])
 {
        // May speed up I/O. After this, don't use C printf, scanf etc.
        ios_base::sync_with_stdio(false);
 
        auto args = Args_t(argc, argv);
-       if (args.mode == ERROR_MODE) {
-               cerr << "Invalid (combination of) arguments, try '"
-                    << args.program_name << " --help' for more information\n";
-               return 1;
-       }
-       boost::locale::generator gen;
-       auto loc = std::locale();
-       try {
-               if (args.encoding.empty())
-                       loc = gen("");
-
-               else
-                       loc = gen("en_US." + args.encoding);
-       }
-       catch (const boost::locale::conv::invalid_charset_error& e) {
-               cerr << e.what() << '\n';
-#ifdef _POSIX_VERSION
-               cerr << "Nuspell error: see `locale -m` for supported "
-                       "encodings.\n";
-#endif
-               return 1;
-       }
-       cin.imbue(loc);
-       cout.imbue(loc);
 
        switch (args.mode) {
        case HELP_MODE:
@@ -334,22 +348,34 @@
        case VERSION_MODE:
                print_version();
                return 0;
+       case ERROR_MODE:
+               cerr << "Invalid (combination of) arguments, try '"
+                    << args.program_name << " --help' for more information\n";
+               return 1;
        default:
                break;
        }
-       clog << "INFO: I/O  locale " << loc << '\n';
-
        auto f = Dict_Finder_For_CLI_Tool();
 
+       auto loc_str = setlocale(LC_CTYPE, "");
+       if (!loc_str) {
+               clog << "WARNING: Invalid locale string, fall back to \"C\".\n";
+               loc_str = setlocale(LC_CTYPE, nullptr); // will return "C"
+       }
+       auto loc_str_sv = string_view(loc_str);
+       if (args.encoding.empty()) {
+#if _POSIX_VERSION
+               auto enc_str = nl_langinfo(CODESET);
+               args.encoding = enc_str;
+#elif _WIN32
+#endif
+       }
+       clog << "INFO: Locale LC_CTYPE=" << loc_str_sv
+            << ", Used encoding=" << args.encoding << '\n';
        if (args.dictionary.empty()) {
                // infer dictionary from locale
-               auto& info = use_facet<boost::locale::info>(loc);
-               args.dictionary = info.language();
-               auto c = info.country();
-               if (!c.empty()) {
-                       args.dictionary += '_';
-                       args.dictionary += c;
-               }
+               auto idx = min(loc_str_sv.find('.'), loc_str_sv.find('@'));
+               args.dictionary = loc_str_sv.substr(0, idx);
        }
        if (args.dictionary.empty()) {
                cerr << "No dictionary provided and can not infer from OS "
@@ -361,8 +387,8 @@
                return 1;
        }
        clog << "INFO: Pointed dictionary " << filename << ".{dic,aff}\n";
-       auto dic = Dictionary();
        auto peak_ram_a = get_peak_ram_usage();
+       auto dic = Dictionary();
        try {
                dic = Dictionary::load_from_path(filename);
        }
@@ -370,22 +396,16 @@
                cerr << e.what() << '\n';
                return 1;
        }
-       if (!use_facet<boost::locale::info>(loc).utf8())
-               dic.imbue(loc);
        auto nuspell_ram = get_peak_ram_usage() - peak_ram_a;
-
        auto aff_name = filename + ".aff";
        auto dic_name = filename + ".dic";
        peak_ram_a = get_peak_ram_usage();
        Hunspell hun(aff_name.c_str(), dic_name.c_str());
        auto hunspell_ram = get_peak_ram_usage() - peak_ram_a;
-       auto hun_loc = gen(
-           "en_US." + Encoding(hun.get_dict_encoding()).value_or_default());
        cout << "Nuspell peak RAM usage:  " << nuspell_ram << "kB\n"
             << "Hunspell peak RAM usage: " << hunspell_ram << "kB\n";
        if (args.files.empty()) {
-               normal_loop(cin, cout, dic, hun, hun_loc, args.print_false,
-                           args.sugs);
+               normal_loop(args, dic, hun, cin, cout);
        }
        else {
                for (auto& file_name : args.files) {
@@ -395,8 +415,7 @@
                                return 1;
                        }
                        in.imbue(cin.getloc());
-                       normal_loop(in, cout, dic, hun, hun_loc,
-                                   args.print_false);
+                       normal_loop(args, dic, hun, in, cout);
                }
        }
        return 0;

commit nuspell for openSUSE:Factory

Reply via email to