Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package tesseract-ocr for openSUSE:Factory 
checked in at 2023-06-08 21:42:11
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/tesseract-ocr (Old)
 and      /work/SRC/openSUSE:Factory/.tesseract-ocr.new.15902 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "tesseract-ocr"

Thu Jun  8 21:42:11 2023 rev:14 rq:1091719 version:5.3.1

Changes:
--------
--- /work/SRC/openSUSE:Factory/tesseract-ocr/tesseract-ocr.changes      
2023-03-08 14:53:58.315133239 +0100
+++ /work/SRC/openSUSE:Factory/.tesseract-ocr.new.15902/tesseract-ocr.changes   
2023-06-08 21:42:20.057910475 +0200
@@ -1,0 +2,8 @@
+Thu Jun  8 15:10:09 UTC 2023 - Ondřej Súkup <mimi...@gmail.com>
+
+- update to 5.3.1
+- revert back to autoconf build as upstrem doesn't support CMAKE
+   outside windows
+  * Bugfixes for special case scenarios
+
+-------------------------------------------------------------------

Old:
----
  tesseract-5.3.0.tar.gz

New:
----
  tesseract-5.3.1.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ tesseract-ocr.spec ++++++
--- /var/tmp/diff_new_pack.5YUgO2/_old  2023-06-08 21:42:20.701914270 +0200
+++ /var/tmp/diff_new_pack.5YUgO2/_new  2023-06-08 21:42:20.713914341 +0200
@@ -16,9 +16,9 @@
 #
 
 
-%define lname  libtesseract-5_3_0
+%define lname  libtesseract5
 Name:           tesseract-ocr
-Version:        5.3.0
+Version:        5.3.1
 Release:        0
 Summary:        Open Source OCR Engine
 License:        Apache-2.0 AND GPL-2.0-or-later
@@ -26,12 +26,17 @@
 Source0:        
https://github.com/tesseract-ocr/tesseract/archive/refs/tags/%{version}.tar.gz#/tesseract-%{version}.tar.gz
 Source99:       baselibs.conf
 BuildRequires:  asciidoc
+BuildRequires:  autoconf
+BuildRequires:  automake
 BuildRequires:  chrpath
-BuildRequires:  cmake
 BuildRequires:  curl-devel
 BuildRequires:  doxygen
 BuildRequires:  fdupes
+%if 0%{?suse_version} > 1550
 BuildRequires:  gcc-c++
+%else
+BuildRequires:  gcc12-c++
+%endif
 BuildRequires:  libtool
 BuildRequires:  libxslt-tools
 BuildRequires:  opencl-headers
@@ -77,20 +82,24 @@
 %autosetup -n tesseract-%{version} -p1
 
 %build
-%cmake -DCMAKE_INSTALL_LIBDIR=%{_lib} -DTESSDATA_PREFIX=%{_datadir}
-%cmake_build
+%if 0%{suse_version} < 1550
+export CC=gcc-12
+export CXX=g++-12
+%endif
+
+autoreconf -fiv
+%configure \
+  --enable-opencl \
+   --disable-static\
+   --with-gnu-ld
 
-chrpath --delete src/training/libpango_training.so
-
-# Manually build manfiles, cmake does not build them
-cd ../doc
-sh generate_manpages.sh
-ls -alh
+%make_build -j1 all training doc
 
 %install
-%cmake_install
-install -D build/src/training/libpango_training.so \
-       %{buildroot}%{_libdir}/libpango_training.so
+%make_install all training-install
+
+rm -f %{buildroot}%{_libdir}/libtesseract.la
+
 mkdir -p %{buildroot}%{_mandir}/{man1,man5}/
 cp -a doc/*.1 %{buildroot}%{_mandir}/man1/
 cp -a doc/*.5 %{buildroot}%{_mandir}/man5/
@@ -106,9 +115,6 @@
 %doc AUTHORS ChangeLog README.md
 %license LICENSE
 %{_bindir}/*
-%{_libdir}/libcommon_training.so
-%{_libdir}/libpango_training.so
-%{_libdir}/libunicharset_training.so
 %dir %{_datadir}/tessdata
 %{_datadir}/tessdata/configs/
 %{_datadir}/tessdata/tessconfigs/
@@ -119,7 +125,6 @@
 %files devel
 %{_includedir}/tesseract
 %{_libdir}/libtesseract.so
-%{_libdir}/cmake/tesseract/
 %{_libdir}/pkgconfig/*.pc
 
 %files -n %{lname}

++++++ baselibs.conf ++++++
--- /var/tmp/diff_new_pack.5YUgO2/_old  2023-06-08 21:42:20.765914648 +0200
+++ /var/tmp/diff_new_pack.5YUgO2/_new  2023-06-08 21:42:20.769914671 +0200
@@ -1,2 +1,2 @@
-libtesseract-5_3_0
+libtesseract5
 

++++++ tesseract-5.3.0.tar.gz -> tesseract-5.3.1.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/.github/ISSUE_TEMPLATE/issue-bug.yml 
new/tesseract-5.3.1/.github/ISSUE_TEMPLATE/issue-bug.yml
--- old/tesseract-5.3.0/.github/ISSUE_TEMPLATE/issue-bug.yml    1970-01-01 
01:00:00.000000000 +0100
+++ new/tesseract-5.3.1/.github/ISSUE_TEMPLATE/issue-bug.yml    2023-04-01 
21:50:30.000000000 +0200
@@ -0,0 +1,75 @@
+name: Bug Report
+description: File a bug report
+body:
+  - type: markdown
+    attributes:
+      value: |
+        ### Attention
+        Before you submit an issue, please review [the guidelines for this 
repository](https://github.com/tesseract-ocr/tesseract/blob/main/CONTRIBUTING.md).
+
+        Have a question? Need help?
+        Please use [our forum](https://groups.google.com/g/tesseract-ocr).
+
+        Please follow these rules:
+        * Don't open an issue for [Tesseract version which was released more 
than a year ago](https://tesseract-ocr.github.io/tessdoc/ReleaseNotes.html).
+        * Don't open an issue which involves 3rd party tools that use 
Tesseract as a library. Only report about an issue with the Tesseract command 
line tool or the C/C++ API.
+        * Please provide the input image.
+        * Also provide output files (txt and/or tsv, hocr, pdf). You can make 
a zip archive that will contain these files, so GitHub will let you upload them.
+        * Don't attach a screenshot of the command line and output. Instead, 
copy the text and paste it in your bug report.
+  - type: textarea
+    attributes:
+      label: Current Behavior
+  - type: textarea
+    attributes:
+      label: Expected Behavior
+  - type: textarea
+    attributes:
+      label: Suggested Fix
+  - type: textarea
+    attributes:
+      label: tesseract -v
+      description: Version info, compiled libraries, SIMD, OpenMP
+      placeholder: "Please paste the output of the command: tesseract -v"
+  - type: dropdown
+    id: os-linux
+    attributes:
+      label: Operating System
+      description:  Choose the OS where the bug occurs
+      multiple: true
+      options:
+        - Windows 11
+        - Windows 10
+        - macOS 13 Ventura
+        - macOS 12 Monterey
+        - macOS 11 Big Sur
+        - Ubuntu 22.04 Jammy
+        - Ubuntu 20.04 Focal
+        - Debian Testing Bookworm
+        - Debian 11 Bullseye
+        - RHEL 9
+        - RHEL 8
+  - type: textarea
+    attributes:
+      label: Other Operating System
+      placeholder: Enter the name and version of the OS
+  - type: textarea
+    attributes:
+      label: uname -a
+      placeholder: "Paste the output of the command: umame -a (if available in 
your system)."
+
+  - type: textarea
+    attributes:
+      label: Compiler
+      placeholder: "Enter compiler name and version (Examples: MSVC 2019 
16.11, Clang 13.0.1, GCC 11.2, Xcode 14.1)"
+  - type: textarea
+    attributes:
+      label: CPU
+      placeholder: "Enter your CPU vendor name and model (Examples: Intel Core 
i7-11700K, AMD Ryzen 7 5800X, Apple Silicon M1)"
+  - type: textarea
+    attributes:
+      label: Virtualization / Containers
+      placeholder: "Enter the name and version of the VM / container which you 
use (Examples: Oracle VM VirtualBox 7.0.4,VMware Workstation 17.0, Hyper-V, 
Docker 20.10.22)"
+  - type: textarea
+    attributes:
+      label: Other Information
+      placeholder: Add more details here.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/tesseract-5.3.0/.github/ISSUE_TEMPLATE/issue-feature-request.yml 
new/tesseract-5.3.1/.github/ISSUE_TEMPLATE/issue-feature-request.yml
--- old/tesseract-5.3.0/.github/ISSUE_TEMPLATE/issue-feature-request.yml        
1970-01-01 01:00:00.000000000 +0100
+++ new/tesseract-5.3.1/.github/ISSUE_TEMPLATE/issue-feature-request.yml        
2023-04-01 21:50:30.000000000 +0200
@@ -0,0 +1,7 @@
+name: Feature Request
+description: File a feature request
+body:
+  - type: textarea
+    attributes:
+      label: Your Feature Request
+      description: Please look first at the [open issues labeled as 'feature 
request'](https://github.com/tesseract-ocr/tesseract/labels/feature%20request).
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/.github/workflows/autotools.yml 
new/tesseract-5.3.1/.github/workflows/autotools.yml
--- old/tesseract-5.3.0/.github/workflows/autotools.yml 2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/.github/workflows/autotools.yml 2023-04-01 
21:50:30.000000000 +0200
@@ -13,15 +13,17 @@
       fail-fast: false
       matrix:
         config:
-          - { name: ubuntu-20.04-clang-7-autotools, os: ubuntu-20.04, cxx: 
clang++-7 }
-          - { name: ubuntu-20.04-clang-8-autotools, os: ubuntu-20.04, cxx: 
clang++-8 } #installed
-          - { name: ubuntu-20.04-clang-9-autotools, os: ubuntu-20.04, cxx: 
clang++-9 } #installed
+          - { name: ubuntu-22.04-clang-14-autotools, os: ubuntu-22.04, cxx: 
clang++-14 }
+          - { name: ubuntu-22.04-clang-13-autotools, os: ubuntu-22.04, cxx: 
clang++-13 } #installed
+          - { name: ubuntu-22.04-clang-12-autotools, os: ubuntu-22.04, cxx: 
clang++-12 } #installed
+          - { name: ubuntu-20.04-clang-11-autotools, os: ubuntu-20.04, cxx: 
clang++-11 } #installed
           - { name: ubuntu-20.04-clang-10-autotools, os: ubuntu-20.04, cxx: 
clang++-10 } #installed
 
-          - { name: ubuntu-20.04-gcc-7-autotools, os: ubuntu-20.04, cxx: g++-7 
} #installed
-          - { name: ubuntu-20.04-gcc-8-autotools, os: ubuntu-20.04, cxx: g++-8 
} #installed
-          - { name: ubuntu-20.04-gcc-9-autotools, os: ubuntu-20.04, cxx: g++-9 
} #installed
+          - { name: ubuntu-22.04-gcc-12-autotools, os: ubuntu-22.04, cxx: 
g++-12 } #installed
+          - { name: ubuntu-22.04-gcc-11-autotools, os: ubuntu-22.04, cxx: 
g++-11 } #installed
           - { name: ubuntu-20.04-gcc-10-autotools, os: ubuntu-20.04, cxx: 
g++-10 } #installed
+          - { name: ubuntu-20.04-gcc-9-autotools, os: ubuntu-20.04, cxx: g++-9 
} #installed
+          - { name: ubuntu-20.04-gcc-8-autotools, os: ubuntu-20.04, cxx: g++-8 
}
 
     steps:
     - uses: actions/checkout@v3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/.github/workflows/cmake-win64.yml 
new/tesseract-5.3.1/.github/workflows/cmake-win64.yml
--- old/tesseract-5.3.0/.github/workflows/cmake-win64.yml       2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/.github/workflows/cmake-win64.yml       2023-04-01 
21:50:30.000000000 +0200
@@ -26,7 +26,7 @@
         run: |
              $git_info=$(git describe --tags HEAD)
              echo "version=${git_info}" >> $env:GITHUB_OUTPUT
-      - name: Setup Instalation Location
+      - name: Setup Installation Location
         run: |
              mkdir ${{env.ILOC}}
       - name: Uninstall Perl
@@ -39,7 +39,6 @@
              git clone --depth 1 https://github.com/zlib-ng/zlib-ng.git
              cd zlib-ng
              cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} 
-DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_SHARED_LIBS=OFF -DZLIB_COMPAT=ON 
-DZLIB_ENABLE_TESTS=OFF -DINSTALL_UTILS=OFF
-             cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} 
-DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF 
-DINSTALL_UTILS=OFF
              cmake --build build --config Release --target install
              cd ..
 
@@ -122,4 +121,3 @@
           tesseract --list-langs
           echo "Checking OCR process"
           tesseract test/testing/phototest.tif -
-      
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/.github/workflows/cmake.yml 
new/tesseract-5.3.1/.github/workflows/cmake.yml
--- old/tesseract-5.3.0/.github/workflows/cmake.yml     2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/.github/workflows/cmake.yml     2023-04-01 
21:50:30.000000000 +0200
@@ -15,21 +15,23 @@
       matrix:
         config:
 
-          - { name: macos-11-clang-12-cmake, os: macos-11, cxx: clang++ } # 
default
-          # - { name: macos-11-clang-11-cmake, os: macos-11, cxx: '$(brew 
--prefix llvm)/bin/clang++' }  #installed
-          - { name: macos-11-gcc-9-cmake, os: macos-11, cxx: g++-9 } #installed
-          - { name: macos-11-gcc-10-cmake, os: macos-11, cxx: g++-10 } 
#installed
+          - { name: macos-12-clang-14-cmake, os: macos-12, cxx: clang++ } # 
default
+          - { name: macos-11-clang-13-cmake, os: macos-11, cxx: clang++ } # 
default
+
+          - { name: macos-11-gcc-12-cmake, os: macos-11, cxx: g++-12 } 
#installed
           - { name: macos-11-gcc-11-cmake, os: macos-11, cxx: g++-11 } 
#installed
 
-          - { name: ubuntu-20.04-clang-7-cmake, os: ubuntu-20.04, cxx: 
clang++-7 }
-          - { name: ubuntu-20.04-clang-8-cmake, os: ubuntu-20.04, cxx: 
clang++-8 } #installed
-          - { name: ubuntu-20.04-clang-9-cmake, os: ubuntu-20.04, cxx: 
clang++-9 } #installed
+          - { name: ubuntu-22.04-clang-14-cmake, os: ubuntu-22.04, cxx: 
clang++-14 } #installed
+          - { name: ubuntu-22.04-clang-13-cmake, os: ubuntu-22.04, cxx: 
clang++-13 } #installed
+          - { name: ubuntu-20.04-clang-12-cmake, os: ubuntu-20.04, cxx: 
clang++-12 } #installed
+          - { name: ubuntu-20.04-clang-11-cmake, os: ubuntu-20.04, cxx: 
clang++-11 } #installed
           - { name: ubuntu-20.04-clang-10-cmake, os: ubuntu-20.04, cxx: 
clang++-10 } #installed
 
-          - { name: ubuntu-20.04-gcc-7-cmake, os: ubuntu-20.04, cxx: g++-7 } 
#installed
-          - { name: ubuntu-20.04-gcc-8-cmake, os: ubuntu-20.04, cxx: g++-8 } 
#installed
-          - { name: ubuntu-20.04-gcc-9-cmake, os: ubuntu-20.04, cxx: g++-9 } 
#installed
+          - { name: ubuntu-22.04-gcc-12-cmake, os: ubuntu-22.04, cxx: g++-12 } 
#installed
+          - { name: ubuntu-22.04-gcc-11-cmake, os: ubuntu-22.04, cxx: g++-11 } 
#installed
           - { name: ubuntu-20.04-gcc-10-cmake, os: ubuntu-20.04, cxx: g++-10 } 
#installed
+          - { name: ubuntu-20.04-gcc-9-cmake, os: ubuntu-20.04, cxx: g++-9 } 
#installed
+          - { name: ubuntu-20.04-gcc-8-cmake, os: ubuntu-20.04, cxx: g++-8 }
 
     steps:
       - name: Install compilers on Linux
@@ -52,7 +54,7 @@
         run: |
            brew install autoconf automake
            brew install leptonica
-           brew install libarchive
+           # brew install libarchive
            brew install pango
            brew install icu4c && brew link icu4c
            brew install cabextract
@@ -145,12 +147,14 @@
         run: |
              export 
"PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$(brew 
--prefix)/opt/libarchive/lib/pkgconfig:$(brew 
--prefix)/Library/Homebrew/os/mac/pkgconfig/11:$PKG_CONFIG_PATH"
              cd test
-             ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp 
"-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" 
$(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread 
-std=c++11
+             ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp 
"-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" 
$(pkg-config --cflags --libs tesseract lept libcurl) -pthread -std=c++11
              ./basicapitest
         if: runner.os == 'macOS'
 
       - name: Display Compiler Version
         run: |
              ${{ matrix.config.cxx }} --version
-             git log -3 --pretty=format:'%h %ad %s | %an'
+             pwd
+             ls -la
+             # git log -3 --pretty=format:'%h %ad %s | %an'
         if: always()
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/.mailmap new/tesseract-5.3.1/.mailmap
--- old/tesseract-5.3.0/.mailmap        2022-12-22 14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/.mailmap        2023-04-01 21:50:30.000000000 +0200
@@ -13,7 +13,7 @@
 Ray Smith <r...@google.com> 
<theraysm...@gmail.com@d0cd1f9f-072b-0410-8dd7-cf729c803f20>
 Ray Smith <r...@google.com> <theraysmith@d0cd1f9f-072b-0410-8dd7-cf729c803f20>
 
-Shree Devi Kumar <5095331+shreesh...@users.noreply.github.com> 
+Shree Devi Kumar <5095331+shreesh...@users.noreply.github.com>
 Shree Devi Kumar <5095331+shreesh...@users.noreply.github.com> 
<5095331+Shreeshrii@users.noreply.github.com5095331+shreesh...@users.noreply.github.com>
 
 Stefan Weil <s...@weilnetz.de>
@@ -26,4 +26,4 @@
 Zdenko Podobný <zde...@gmail.com>
 Zdenko Podobný <zde...@gmail.com> <zdenko.podo...@nbazp1.sps>
 Zdenko Podobný <zde...@gmail.com> 
<zde...@gmail.com@d0cd1f9f-072b-0410-8dd7-cf729c803f20>
- Zdenko Podobný <zde...@gmail.com> 
<zdenop@d0cd1f9f-072b-0410-8dd7-cf729c803f20>
+Zdenko Podobný <zde...@gmail.com> 
<zdenop@d0cd1f9f-072b-0410-8dd7-cf729c803f20>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/CMakeLists.txt 
new/tesseract-5.3.1/CMakeLists.txt
--- old/tesseract-5.3.0/CMakeLists.txt  2022-12-22 14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/CMakeLists.txt  2023-04-01 21:50:30.000000000 +0200
@@ -89,6 +89,10 @@
 option(ENABLE_LTO "Enable link-time optimization" OFF)
 option(FAST_FLOAT "Enable float for LSTM" ON)
 option(ENABLE_OPENCL "Enable unsupported experimental OpenCL support" OFF)
+option(ENABLE_NATIVE
+       "Enable optimization for host CPU (could break HW compatibility)" OFF)
+# see
+# 
https://stackoverflow.com/questions/52653025/why-is-march-native-used-so-rarely
 option(BUILD_TRAINING_TOOLS "Build training tools" ON)
 option(BUILD_TESTS "Build tests" OFF)
 option(USE_SYSTEM_ICU "Use system ICU" OFF)
@@ -123,6 +127,9 @@
 include(CheckCXXCompilerFlag)
 
 set(CMAKE_CXX_STANDARD 17)
+if("cxx_std_20" IN_LIST CMAKE_CXX_COMPILE_FEATURES)
+  set(CMAKE_CXX_STANDARD 20)
+endif()
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
   # cygwin gnu c++ needs to use -std=gnu++17 instead of -std=c++17
@@ -143,15 +150,18 @@
   message(STATUS "IPO / LTO not supported: <${error}>")
 endif()
 
-check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
-if(COMPILER_SUPPORTS_MARCH_NATIVE)
-  set(MARCH_NATIVE_FLAGS "${MARCH_NATIVE_FLAGS} -march=native")
-  if(NOT CLANG AND MSVC)
-    # clang-cl does not know this argument
-    set(MARCH_NATIVE_FLAGS "${MARCH_NATIVE_FLAGS} -mtune=native")
-  endif()
-  set(MARCH_NATIVE_OPT ON)
-endif()
+set(MARCH_NATIVE_OPT OFF)
+if(ENABLE_NATIVE)
+  check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
+  if(COMPILER_SUPPORTS_MARCH_NATIVE)
+    set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -march=native")
+    if(NOT CLANG AND MSVC)
+      # clang-cl does not know this argument
+      set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -mtune=native")
+    endif()
+    set(MARCH_NATIVE_OPT ON)
+  endif(COMPILER_SUPPORTS_MARCH_NATIVE)
+endif(ENABLE_NATIVE)
 
 message(STATUS "CMAKE_SYSTEM_PROCESSOR=<${CMAKE_SYSTEM_PROCESSOR}>")
 
@@ -178,6 +188,10 @@
     set(HAVE_SSE4_1 ON)
     set(SSE4_1_COMPILE_FLAGS "-D__SSE4_1__")
     add_definitions("-DHAVE_SSE4_1")
+
+    set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -openmp:experimental")
+    add_definitions("-DOPENMP_SIMD")
+
     # clang with MSVC compatibility
     if(CLANG)
       set(CMAKE_CXX_FLAGS
@@ -189,7 +203,7 @@
         set(SSE4_1_COMPILE_FLAGS "-msse4.1 ${SSE4_1_COMPILE_FLAGS}")
       endif(HAVE_SSE4_1)
     endif(CLANG)
-  else()  # if not MSVC
+  else() # if not MSVC
     check_cxx_compiler_flag("-mavx" HAVE_AVX)
     if(HAVE_AVX)
       set(AVX_COMPILE_FLAGS "-mavx")
@@ -219,6 +233,12 @@
       set(SSE4_1_COMPILE_FLAGS "-msse4.1")
       add_definitions("-DHAVE_SSE4_1")
     endif()
+
+    check_cxx_compiler_flag("-fopenmp-simd" OPENMP_SIMD)
+    if(OPENMP_SIMD)
+      set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -fopenmp-simd")
+      add_definitions("-DOPENMP_SIMD")
+    endif(OPENMP_SIMD)
   endif(MSVC)
 
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64.*|AARCH64.*")
@@ -228,8 +248,6 @@
   set(HAVE_AVX512F FALSE)
   set(HAVE_FMA FALSE)
   set(HAVE_SSE4_1 FALSE)
-
-  add_definitions("-DHAVE_NEON")
   set(HAVE_NEON TRUE)
 
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm.*")
@@ -239,12 +257,7 @@
   set(HAVE_AVX512F FALSE)
   set(HAVE_FMA FALSE)
   set(HAVE_SSE4_1 FALSE)
-
   check_cxx_compiler_flag("-mfpu=neon" HAVE_NEON)
-  if(HAVE_NEON)
-    set(NEON_COMPILE_FLAGS "-mfpu=neon")
-    add_definitions("-DHAVE_NEON")
-  endif()
 
 else()
 
@@ -257,7 +270,13 @@
 
 endif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86|x86_64|AMD64|amd64|i386|i686")
 
-# Compiler specific environments
+if(HAVE_NEON)
+  set(NEON_COMPILE_FLAGS "-mfpu=neon")
+  message(STATUS "LTO build is not supported on arm/RBPi.")
+  set(ENABLE_LTO FALSE)  # enable LTO cause fatal error on arm/RBPi
+endif()
+
+# Compiler specific environment
 if(CMAKE_COMPILER_IS_GNUCXX OR MINGW)
   set(CMAKE_CXX_FLAGS_DEBUG
       "${CMAKE_CXX_FLAGS_DEBUG} -Wall -DDEBUG -pedantic -Og")
@@ -274,9 +293,9 @@
   # loss of data wd4275 non dll-interface class wd4305 ...truncation from
   # 'double' to 'float'
   set(CMAKE_CXX_FLAGS_RELEASE
-      "${CMAKE_CXX_FLAGS_RELEASE} /wd4244 /wd4305 /wd4267 /wd4251 /wd4275 
/wd4005")
-  set(CMAKE_CXX_FLAGS_RELEASE
-      "${CMAKE_CXX_FLAGS_RELEASE} /wd4068")
+      "${CMAKE_CXX_FLAGS_RELEASE} /wd4244 /wd4305 /wd4267 /wd4251 /wd4275 
/wd4005"
+  )
+  set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /wd4068")
   # Don't use /Wall because it generates too many warnings.
   set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /W0 /bigobj")
   # MT flag
@@ -319,20 +338,17 @@
     add_definitions(-D_OPENMP=201107)
   endif()
   if(MSVC)
-    # Note: -openmp:llvm is available for X64 from MSVC 16.9
-    # from MSVC 16.10 Preview 2 there is support also for x86 and arm64
+    # Note: -openmp:llvm is available for X64 from MSVC 16.9 from MSVC 16.10
+    # Preview 2 there is support also for x86 and arm64
     # 
https://devblogs.microsoft.com/cppblog/openmp-updates-and-fixes-for-cpp-in-visual-studio-2019-16-10/
-    if ("${OpenMP_CXX_FLAGS}" STREQUAL "-openmp")
+    if("${OpenMP_CXX_FLAGS}" STREQUAL "-openmp")
       set(OpenMP_CXX_FLAGS "-openmp:llvm")
     endif()
-    # 'simd': requires '-openmp:experimental'
-    set_source_files_properties(src/arch/dotproduct.cpp
-                              PROPERTIES COMPILE_FLAGS "-openmp:experimental")
   endif()
   if(OpenMP_FOUND)
     message(">> OpenMP_FOUND ${OpenMP_FOUND} version: ${OpenMP_CXX_VERSION}")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-    if (NOT TARGET OpenMP::OpenMP_CXX)
+    if(NOT TARGET OpenMP::OpenMP_CXX)
       add_library(OpenMP::OpenMP_CXX IMPORTED INTERFACE)
     endif()
   endif()
@@ -385,17 +401,15 @@
   include_directories(${Leptonica_INCLUDE_DIRS})
 
   # Check for optional libraries.
-  if(WIN32)
-    find_package(TIFF) # for tesseract
-    if(NOT TIFF_FOUND AND PKG_CONFIG_EXECUTABLE)
-      # try PKG_CONFIG to find libtiff if cmake failed
-      pkg_check_modules(TIFF libtiff-4)
-    endif()
-    if(TIFF_FOUND)
-      set(HAVE_TIFFIO_H ON)
-      include_directories(${TIFF_INCLUDE_DIRS})
-    endif(TIFF_FOUND)
-  endif(WIN32)
+  find_package(TIFF) # for tesseract
+  if(NOT TIFF_FOUND AND PKG_CONFIG_EXECUTABLE)
+    # try PKG_CONFIG to find libtiff if cmake failed
+    pkg_check_modules(TIFF libtiff-4)
+  endif()
+  if(TIFF_FOUND)
+    set(HAVE_TIFFIO_H ON)
+    include_directories(${TIFF_INCLUDE_DIRS})
+  endif(TIFF_FOUND)
   if(DISABLE_ARCHIVE)
     set(HAVE_LIBARCHIVE OFF)
   else(DISABLE_ARCHIVE)
@@ -424,9 +438,9 @@
   endif(DISABLE_CURL)
 endif()
 
-IF(ENABLE_OPENCL)
+if(ENABLE_OPENCL)
   find_package(OpenCL)
-  if (OpenCL_FOUND)
+  if(OpenCL_FOUND)
     include_directories(${OpenCL_INCLUDE_DIRS})
     message(STATUS "OpenCL_INCLUDE_DIRS: ${OpenCL_INCLUDE_DIRS}")
     message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}")
@@ -442,8 +456,10 @@
 #
 # 
##############################################################################
 
-if(NOT MSVC)
-  set(MARCH_NATIVE_FLAGS "${MARCH_NATIVE_FLAGS} -O3 -ffast-math")
+if(MSVC)
+  set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} /fp:fast")
+else()
+  set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -O3 -ffast-math")
 endif()
 
 if(NOT DEFINED CMAKE_INSTALL_LIBDIR)
@@ -518,6 +534,8 @@
                "${HAVE_LIBARCHIVE}")
 message(STATUS "Build with libcurl support [HAVE_LIBCURL]: ${HAVE_LIBCURL}")
 message(STATUS "Enable float for LSTM [FAST_FLOAT]: ${FAST_FLOAT}")
+message(STATUS "Enable optimization for host CPU (could break HW 
compatibility)"
+               " [ENABLE_NATIVE]: ${ENABLE_NATIVE}")
 message(STATUS "Disable disable graphics (ScrollView) [GRAPHICS_DISABLED]: "
                "${GRAPHICS_DISABLED}")
 message(STATUS "Disable the legacy OCR engine [DISABLED_LEGACY_ENGINE]: "
@@ -526,10 +544,13 @@
                "${BUILD_TRAINING_TOOLS}")
 message(STATUS "Build tests [BUILD_TESTS]: ${BUILD_TESTS}")
 if(ENABLE_OPENCL)
-  message(STATUS "Enable unsupported experimental OpenCL [ENABLE_OPENCL]: 
${USE_OPENCL}")
+  message(
+    STATUS
+      "Enable unsupported experimental OpenCL [ENABLE_OPENCL]: ${USE_OPENCL}")
 endif(ENABLE_OPENCL)
 message(STATUS "Use system ICU Library [USE_SYSTEM_ICU]: ${USE_SYSTEM_ICU}")
-message(STATUS "Install tesseract configs [INSTALL_CONFIGS]: 
${INSTALL_CONFIGS}")
+message(
+  STATUS "Install tesseract configs [INSTALL_CONFIGS]: ${INSTALL_CONFIGS}")
 message(STATUS "--------------------------------------------------------")
 message(STATUS)
 
@@ -582,7 +603,7 @@
         PARENT_SCOPE)
   endfunction()
 
-set(TESSERACT_SRC_LEGACY
+  set(TESSERACT_SRC_LEGACY
       src/ccmain/adaptions.cpp
       src/ccmain/docqual.cpp
       src/ccmain/equationdetect.cpp
@@ -652,10 +673,10 @@
 list(APPEND arch_files src/arch/dotproduct.cpp src/arch/simddetect.cpp
      src/arch/intsimdmatrix.cpp)
 
-if(MARCH_NATIVE_FLAGS)
+if(DOTPRODUCT_FLAGS)
   set_source_files_properties(src/arch/dotproduct.cpp
-                              PROPERTIES COMPILE_FLAGS ${MARCH_NATIVE_FLAGS})
-endif(MARCH_NATIVE_FLAGS)
+                              PROPERTIES COMPILE_FLAGS ${DOTPRODUCT_FLAGS})
+endif(DOTPRODUCT_FLAGS)
 if(HAVE_AVX)
   list(APPEND arch_files_opt src/arch/dotproductavx.cpp)
   set_source_files_properties(src/arch/dotproductavx.cpp
@@ -723,40 +744,37 @@
     src/api/wordstrboxrenderer.cpp)
 
 set(TESSERACT_CONFIGS
-  tessdata/configs/alto
-  tessdata/configs/ambigs.train
-  tessdata/configs/api_config
-  tessdata/configs/bazaar
-  tessdata/configs/bigram
-  tessdata/configs/box.train
-  tessdata/configs/box.train.stderr
-  tessdata/configs/digits
-  tessdata/configs/get.images
-  tessdata/configs/hocr
-  tessdata/configs/inter
-  tessdata/configs/kannada
-  tessdata/configs/linebox
-  tessdata/configs/logfile
-  tessdata/configs/lstm.train
-  tessdata/configs/lstmbox
-  tessdata/configs/lstmdebug
-  tessdata/configs/makebox
-  tessdata/configs/pdf
-  tessdata/configs/quiet
-  tessdata/configs/rebox
-  tessdata/configs/strokewidth
-  tessdata/configs/tsv
-  tessdata/configs/txt
-  tessdata/configs/unlv
-  tessdata/configs/wordstrbox)
+    tessdata/configs/alto
+    tessdata/configs/ambigs.train
+    tessdata/configs/api_config
+    tessdata/configs/bazaar
+    tessdata/configs/bigram
+    tessdata/configs/box.train
+    tessdata/configs/box.train.stderr
+    tessdata/configs/digits
+    tessdata/configs/get.images
+    tessdata/configs/hocr
+    tessdata/configs/inter
+    tessdata/configs/kannada
+    tessdata/configs/linebox
+    tessdata/configs/logfile
+    tessdata/configs/lstm.train
+    tessdata/configs/lstmbox
+    tessdata/configs/lstmdebug
+    tessdata/configs/makebox
+    tessdata/configs/pdf
+    tessdata/configs/quiet
+    tessdata/configs/rebox
+    tessdata/configs/strokewidth
+    tessdata/configs/tsv
+    tessdata/configs/txt
+    tessdata/configs/unlv
+    tessdata/configs/wordstrbox)
 
 set(TESSERACT_TESSCONFIGS
-  tessdata/tessconfigs/batch
-  tessdata/tessconfigs/batch.nochop
-  tessdata/tessconfigs/matdemo
-  tessdata/tessconfigs/msdemo
-  tessdata/tessconfigs/nobatch
-  tessdata/tessconfigs/segdemo)
+    tessdata/tessconfigs/batch tessdata/tessconfigs/batch.nochop
+    tessdata/tessconfigs/matdemo tessdata/tessconfigs/msdemo
+    tessdata/tessconfigs/nobatch tessdata/tessconfigs/segdemo)
 
 set(LIBTESSFILES ${TESSERACT_SRC} ${arch_files} ${arch_files_opt}
                  ${TESSERACT_HDR})
@@ -765,8 +783,7 @@
 
 add_library(libtesseract ${LIBTESSFILES})
 target_include_directories(
-  libtesseract
-  BEFORE
+  libtesseract BEFORE
   PRIVATE src
   PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
          $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/arch>
@@ -807,19 +824,19 @@
   endif()
 endif(CURL_FOUND)
 
-set_target_properties(libtesseract
-  PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
-set_target_properties(libtesseract
-  PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
-
-if(WIN32)
-  set_target_properties(libtesseract
-     PROPERTIES OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR})
-  set_target_properties(libtesseract
-     PROPERTIES DEBUG_OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}d)
-else()
-  set_target_properties(libtesseract PROPERTIES OUTPUT_NAME tesseract)
-endif()
+set_target_properties(
+  libtesseract PROPERTIES VERSION
+                          ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
+set_target_properties(
+  libtesseract PROPERTIES SOVERSION
+                          ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
+
+set_target_properties(
+  libtesseract
+  PROPERTIES
+    OUTPUT_NAME
+    
tesseract$<$<BOOL:${WIN32}>:${VERSION_MAJOR}${VERSION_MINOR}$<$<CONFIG:DEBUG>:d>>
+)
 
 if(SW_BUILD)
   target_link_libraries(libtesseract PUBLIC org.sw.demo.danbloomberg.leptonica
@@ -847,11 +864,13 @@
   target_link_libraries(libtesseract PRIVATE ${OpenMP_LIBRARY})
 endif()
 
-if (ANDROID)
-    add_definitions(-DANDROID)
-    find_package(CpuFeaturesNdkCompat REQUIRED)
-    target_include_directories(libtesseract PRIVATE 
"${CpuFeaturesNdkCompat_DIR}/../../../include/ndk_compat")
-    target_link_libraries     (libtesseract PRIVATE CpuFeatures::ndk_compat)
+if(ANDROID)
+  add_definitions(-DANDROID)
+  find_package(CpuFeaturesNdkCompat REQUIRED)
+  target_include_directories(
+    libtesseract
+    PRIVATE "${CpuFeaturesNdkCompat_DIR}/../../../include/ndk_compat")
+  target_link_libraries(libtesseract PRIVATE CpuFeatures::ndk_compat)
 endif()
 
 # 
##############################################################################
@@ -870,8 +889,9 @@
 
 # 
##############################################################################
 
-if(BUILD_TESTS AND EXISTS
-     ${CMAKE_CURRENT_SOURCE_DIR}/unittest/third_party/googletest/CMakeLists.txt
+if(BUILD_TESTS
+   AND EXISTS
+       
${CMAKE_CURRENT_SOURCE_DIR}/unittest/third_party/googletest/CMakeLists.txt
 )
   add_subdirectory(unittest/third_party/googletest)
 endif()
@@ -884,8 +904,13 @@
 get_target_property(tesseract_VERSION libtesseract VERSION)
 get_target_property(tesseract_OUTPUT_NAME libtesseract OUTPUT_NAME)
 
-configure_file(tesseract.pc.cmake ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc
+configure_file(tesseract.pc.cmake ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc.in
                @ONLY)
+# to resolve generator expression in OUTPUT_NAME
+file(
+  GENERATE
+  OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tesseract_$<CONFIG>.pc
+  INPUT ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc.in)
 
 configure_package_config_file(
   cmake/templates/TesseractConfig.cmake.in
@@ -897,8 +922,10 @@
   VERSION ${PACKAGE_VERSION}
   COMPATIBILITY SameMajorVersion)
 
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc
-        DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
+install(
+  FILES ${CMAKE_CURRENT_BINARY_DIR}/tesseract_$<CONFIG>.pc
+  DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig
+  RENAME tesseract.pc)
 install(TARGETS tesseract DESTINATION bin)
 install(
   TARGETS libtesseract
@@ -930,10 +957,10 @@
   DESTINATION include/tesseract)
 
 if(INSTALL_CONFIGS)
-install(FILES ${TESSERACT_CONFIGS}
-        DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/configs)
-install(FILES ${TESSERACT_TESSCONFIGS}
-        DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/tessconfigs)
+  install(FILES ${TESSERACT_CONFIGS}
+          DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/configs)
+  install(FILES ${TESSERACT_TESSCONFIGS}
+          DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/tessconfigs)
 endif()
 
 # 
##############################################################################
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/ChangeLog 
new/tesseract-5.3.1/ChangeLog
--- old/tesseract-5.3.0/ChangeLog       2022-12-22 14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/ChangeLog       2023-04-01 21:50:30.000000000 +0200
@@ -1,3 +1,11 @@
+2023-04-01 - V5.3.1
+ * Bug fixes for some special scenarios:
+   * Fix issue #4010.
+   * textord: Catch empty rows in block iterator (fixes #4039).
+   * Fix FP division by zero (issue #3995).
+ * Improve documentation and log messages.
+ * Build fixes and improvements (mainly for cmake).
+
 2022-12-22 - V5.3.0
  * Minor updates for documentation and cmake builds.
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/Makefile.am 
new/tesseract-5.3.1/Makefile.am
--- old/tesseract-5.3.0/Makefile.am     2022-12-22 14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/Makefile.am     2023-04-01 21:50:30.000000000 +0200
@@ -821,7 +821,7 @@
 training_CPPFLAGS += -DDISABLED_LEGACY_ENGINE
 endif
 
-# TODO: training programs can not be linked to shared library created
+# TODO: training programs cannot be linked to shared library created
 # with -fvisibility
 if VISIBILITY
 AM_LDFLAGS += -all-static
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/README.md 
new/tesseract-5.3.1/README.md
--- old/tesseract-5.3.0/README.md       2022-12-22 14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/README.md       2023-04-01 21:50:30.000000000 +0200
@@ -26,13 +26,13 @@
 
 This package contains an **OCR engine** - `libtesseract` and a **command line 
program** - `tesseract`.
 
-Tesseract 4 adds a new neural net (LSTM) based OCR engine which is focused on 
line recognition, but also still supports the legacy Tesseract OCR engine of 
Tesseract 3 which works by recognizing character patterns. Compatibility with 
Tesseract 3 is enabled by using the Legacy OCR Engine mode (--oem 0).
+Tesseract 4 adds a new neural net (LSTM) based [OCR 
engine](https://en.wikipedia.org/wiki/Optical_character_recognition) which is 
focused on line recognition, but also still supports the legacy Tesseract OCR 
engine of Tesseract 3 which works by recognizing character patterns. 
Compatibility with Tesseract 3 is enabled by using the Legacy OCR Engine mode 
(--oem 0).
 It also needs 
[traineddata](https://tesseract-ocr.github.io/tessdoc/Data-Files.html) files 
which support the legacy engine, for example those from the 
[tessdata](https://github.com/tesseract-ocr/tessdata) repository.
 
 Stefan Weil is the current lead developer. Ray Smith was the lead developer 
until 2018. The maintainer is Zdenko Podobny. For a list of contributors see 
[AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/main/AUTHORS)
 and GitHub's log of 
[contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors).
 
-Tesseract has **unicode (UTF-8) support**, and can **recognize more than 100 
languages** "out of the box".
+Tesseract has **unicode (UTF-8) support**, and can **recognize [more than 100 
languages](https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html)**
 "out of the box".
 
 Tesseract supports **[various image 
formats](https://tesseract-ocr.github.io/tessdoc/InputFormats)** including PNG, 
JPEG and TIFF.
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/VERSION new/tesseract-5.3.1/VERSION
--- old/tesseract-5.3.0/VERSION 2022-12-22 14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/VERSION 2023-04-01 21:50:30.000000000 +0200
@@ -1 +1 @@
-5.3.0
+5.3.1
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/cmake/Configure.cmake 
new/tesseract-5.3.1/cmake/Configure.cmake
--- old/tesseract-5.3.0/cmake/Configure.cmake   2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/cmake/Configure.cmake   2023-04-01 21:50:30.000000000 
+0200
@@ -118,6 +118,7 @@
 #cmakedefine FAST_FLOAT ${FAST_FLOAT}
 #cmakedefine DISABLED_LEGACY_ENGINE ${DISABLED_LEGACY_ENGINE}
 #cmakedefine HAVE_TIFFIO_H ${HAVE_TIFFIO_H}
+#cmakedefine HAVE_NEON ${HAVE_NEON}
 #cmakedefine HAVE_LIBARCHIVE ${HAVE_LIBARCHIVE}
 #cmakedefine HAVE_LIBCURL ${HAVE_LIBCURL}
 #cmakedefine USE_OPENCL ${USE_OPENCL}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/configure.ac 
new/tesseract-5.3.1/configure.ac
--- old/tesseract-5.3.0/configure.ac    2022-12-22 14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/configure.ac    2023-04-01 21:50:30.000000000 +0200
@@ -28,8 +28,8 @@
 
 # Define date of package, etc. Could be useful in auto-generated
 # documentation.
-PACKAGE_YEAR=2022
-PACKAGE_DATE="12/22"
+PACKAGE_YEAR=2023
+PACKAGE_DATE="04/01"
 
 abs_top_srcdir=`AS_DIRNAME([$0])`
 
@@ -640,7 +640,7 @@
    echo "$ sudo make training-install"
    echo ""],
   [
-   echo "You can not build training tools because of missing dependency."
+   echo "You cannot build training tools because of missing dependency."
    echo "Check configure output for details."
    echo ""]
 )
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/include/tesseract/ocrclass.h 
new/tesseract-5.3.1/include/tesseract/ocrclass.h
--- old/tesseract-5.3.0/include/tesseract/ocrclass.h    2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/include/tesseract/ocrclass.h    2023-04-01 
21:50:30.000000000 +0200
@@ -61,7 +61,7 @@
   // is UTF8 which means that ASCII characters will come out as one structure
   // but other characters will be returned in two or more instances of this
   // structure with a single byte of the  UTF8 code in each, but each will have
-  // the same bounding box. Programs which want to handle languagues with
+  // the same bounding box. Programs which want to handle languages with
   // different characters sets will need to handle extended characters
   // appropriately, but *all* code needs to be prepared to receive UTF8 coded
   // characters for characters such as bullet and fancy quotes.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/ccmain/pagesegmain.cpp 
new/tesseract-5.3.1/src/ccmain/pagesegmain.cpp
--- old/tesseract-5.3.0/src/ccmain/pagesegmain.cpp      2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/ccmain/pagesegmain.cpp      2023-04-01 
21:50:30.000000000 +0200
@@ -332,11 +332,11 @@
 
     finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);
 
-#ifndef DISABLED_LEGACY_ENGINE
-
+  #ifndef DISABLED_LEGACY_ENGINE
     if (equ_detect_) {
       equ_detect_->LabelSpecialText(to_block);
     }
+  #endif
 
     BLOBNBOX_CLIST osd_blobs;
     // osd_orientation is the number of 90 degree rotations to make the
@@ -350,6 +350,8 @@
       vertical_text = 
finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio, to_block,
                                                       &osd_blobs);
     }
+
+  #ifndef DISABLED_LEGACY_ENGINE
     if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && osr != 
nullptr) {
       std::vector<int> osd_scripts;
       if (osd_tess != this) {
@@ -400,10 +402,10 @@
         }
       }
     }
+  #endif // ndef DISABLED_LEGACY_ENGINE
+
     osd_blobs.shallow_clear();
     finder->CorrectOrientation(to_block, vertical_text, osd_orientation);
-
-#endif // ndef DISABLED_LEGACY_ENGINE
   }
 
   return finder;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/ccmain/paragraphs.cpp 
new/tesseract-5.3.1/src/ccmain/paragraphs.cpp
--- old/tesseract-5.3.0/src/ccmain/paragraphs.cpp       2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/ccmain/paragraphs.cpp       2023-04-01 
21:50:30.000000000 +0200
@@ -514,8 +514,12 @@
 
 void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
                                           std::vector<std::string> &dbg) const 
{
-  char s[30];
-  snprintf(s, sizeof(s), "[%3d,%3d;%3d,%3d]", lmargin_, lindent_, rindent_, 
rmargin_);
+  char s[60];
+  // The largest (positive and negative) numbers are reported for lindent & 
rindent.
+  // While the column header has widths 5,4,4,5, it is therefore opportune to 
slightly
+  // offset the widths in the format string here to allow ample space for 
lindent & rindent
+  // while keeping the final table output nicely readable: 4,5,5,4.
+  snprintf(s, sizeof(s), "[%4d,%5d;%5d,%4d]", lmargin_, lindent_, rindent_, 
rmargin_);
   dbg.emplace_back(s);
   std::string model_string;
   model_string += static_cast<char>(GetLineType());
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/ccmain/paramsd.cpp 
new/tesseract-5.3.1/src/ccmain/paramsd.cpp
--- old/tesseract-5.3.0/src/ccmain/paramsd.cpp  2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/src/ccmain/paramsd.cpp  2023-04-01 21:50:30.000000000 
+0200
@@ -99,7 +99,7 @@
                                  char *t        // target string
 ) {
   int full_length = strlen(s);
-  int reqd_len = 0; // No. of chars requird
+  int reqd_len = 0; // No. of chars required
   const char *next_word = s;
 
   while ((n > 0) && reqd_len < full_length) {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/ccmain/resultiterator.cpp 
new/tesseract-5.3.1/src/ccmain/resultiterator.cpp
--- old/tesseract-5.3.0/src/ccmain/resultiterator.cpp   2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/ccmain/resultiterator.cpp   2023-04-01 
21:50:30.000000000 +0200
@@ -149,7 +149,7 @@
   for (int i = 0; i < word_length_; i++) {
     letter_types.push_back(it_->word()->SymbolDirection(i));
   }
-  // Convert a single separtor sandwiched between two EN's into an EN.
+  // Convert a single separator sandwiched between two ENs into an EN.
   for (int i = 0; i + 2 < word_length_; i++) {
     if (letter_types[i] == U_EURO_NUM && letter_types[i + 2] == U_EURO_NUM &&
         (letter_types[i + 1] == U_EURO_NUM_SEP || letter_types[i + 1] == 
U_COMMON_NUM_SEP)) {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/ccstruct/boxread.cpp 
new/tesseract-5.3.1/src/ccstruct/boxread.cpp
--- old/tesseract-5.3.0/src/ccstruct/boxread.cpp        2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/ccstruct/boxread.cpp        2023-04-01 
21:50:30.000000000 +0200
@@ -78,7 +78,7 @@
                   std::vector<int> *pages) {
   std::ifstream input(BoxFileName(filename).c_str(), std::ios::in | 
std::ios::binary);
   if (input.fail()) {
-    tprintf("Can not read box data from '%s'.\n", 
BoxFileName(filename).c_str());
+    tprintf("Cannot read box data from '%s'.\n", 
BoxFileName(filename).c_str());
     tprintf("Does it exists?\n");
     return false;
   }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/ccstruct/ocrblock.cpp 
new/tesseract-5.3.1/src/ccstruct/ocrblock.cpp
--- old/tesseract-5.3.0/src/ccstruct/ocrblock.cpp       2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/ccstruct/ocrblock.cpp       2023-04-01 
21:50:30.000000000 +0200
@@ -312,7 +312,7 @@
 //  |  You  see this|    | |_| \|_| |   |rectangular      |
 //  |text is  flowed|    |      }   |   |boundary     that|
 //  |around  a  mid-|     \   ____  |   |forms the  ideal-|
-//  |cloumn portrait._____ \       /  __|ized  text margin|
+//  |column portrait._____ \       /  __|ized  text margin|
 //  |  Polyblobs     exist| \    /   |from which we should|
 //  |to account for insets|  |   |   |measure    paragraph|
 //  |which make  otherwise|  -----   |indentation.        |
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/ccstruct/ratngs.cpp 
new/tesseract-5.3.1/src/ccstruct/ratngs.cpp
--- old/tesseract-5.3.0/src/ccstruct/ratngs.cpp 2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/src/ccstruct/ratngs.cpp 2023-04-01 21:50:30.000000000 
+0200
@@ -524,7 +524,7 @@
 // bounding boxes, *this to get the unichars, and this->unicharset
 // to get the target positions. If small_caps is true, sub/super are not
 // considered, but dropcaps are.
-// NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
+// NOTE: blobs_list should be the chopped_word blobs. (Fully segmented.)
 void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD *word, int debug) {
   // Initialize to normal.
   for (unsigned i = 0; i < length_; ++i) {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/ccstruct/ratngs.h 
new/tesseract-5.3.1/src/ccstruct/ratngs.h
--- old/tesseract-5.3.0/src/ccstruct/ratngs.h   2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/src/ccstruct/ratngs.h   2023-04-01 21:50:30.000000000 
+0200
@@ -539,7 +539,7 @@
   // bounding boxes, *this to get the unichars, and this->unicharset
   // to get the target positions. If small_caps is true, sub/super are not
   // considered, but dropcaps are.
-  // NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
+  // NOTE: blobs_list should be the chopped_word blobs. (Fully segmented.)
   void SetScriptPositions(bool small_caps, TWERD *word, int debug = 0);
   // Sets all the script_pos_ positions to the given position.
   void SetAllScriptPositions(ScriptPos position);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/ccstruct/stepblob.cpp 
new/tesseract-5.3.1/src/ccstruct/stepblob.cpp
--- old/tesseract-5.3.0/src/ccstruct/stepblob.cpp       2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/ccstruct/stepblob.cpp       2023-04-01 
21:50:30.000000000 +0200
@@ -314,7 +314,7 @@
  * C_BLOB::count_transitions
  *
  * Return the total x and y maxes and mins in the blob.
- * Chlid outlines are not counted.
+ * Child outlines are not counted.
  **********************************************************************/
 
 int32_t C_BLOB::count_transitions( // area
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/ccutil/params.h 
new/tesseract-5.3.1/src/ccutil/params.h
--- old/tesseract-5.3.0/src/ccutil/params.h     2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/src/ccutil/params.h     2023-04-01 21:50:30.000000000 
+0200
@@ -21,6 +21,7 @@
 
 #include <tesseract/export.h> // for TESS_API
 
+#include <cstdint>
 #include <cstdio>
 #include <cstring>
 #include <string>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/ccutil/unicharmap.h 
new/tesseract-5.3.1/src/ccutil/unicharmap.h
--- old/tesseract-5.3.0/src/ccutil/unicharmap.h 2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/src/ccutil/unicharmap.h 2023-04-01 21:50:30.000000000 
+0200
@@ -33,7 +33,7 @@
 
   ~UNICHARMAP();
 
-  // Insert the given unichar represention in the UNICHARMAP and associate it
+  // Insert the given unichar representation in the UNICHARMAP and associate it
   // with the given id. The length of the representation MUST be non-zero.
   void insert(const char *const unichar_repr, UNICHAR_ID id);
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/ccutil/unicharset.cpp 
new/tesseract-5.3.1/src/ccutil/unicharset.cpp
--- old/tesseract-5.3.0/src/ccutil/unicharset.cpp       2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/ccutil/unicharset.cpp       2023-04-01 
21:50:30.000000000 +0200
@@ -1104,7 +1104,7 @@
   const char *ptr = string;
   int len = strlen(string);
   if (len < kMinLen || *ptr != kSeparator) {
-    return nullptr; // this string can not represent a fragment
+    return nullptr; // this string cannot represent a fragment
   }
   ptr++; // move to the next character
   int step = 0;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/classify/mfoutline.cpp 
new/tesseract-5.3.1/src/classify/mfoutline.cpp
--- old/tesseract-5.3.0/src/classify/mfoutline.cpp      2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/classify/mfoutline.cpp      2023-04-01 
21:50:30.000000000 +0200
@@ -19,7 +19,7 @@
 
 #include "blobs.h"
 #include "classify.h"
-#include "clusttool.h" //If remove you get cought in a loop somewhere
+#include "clusttool.h" //If remove you get caught in a loop somewhere
 #include "mfx.h"
 #include "params.h"
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/dict/dawg.h 
new/tesseract-5.3.1/src/dict/dawg.h
--- old/tesseract-5.3.0/src/dict/dawg.h 2022-12-22 14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/dict/dawg.h 2023-04-01 21:50:30.000000000 +0200
@@ -112,7 +112,7 @@
   /// Magic number to determine endianness when reading the Dawg from file.
   static const int16_t kDawgMagicNumber = 42;
   /// A special unichar id that indicates that any appropriate pattern
-  /// (e.g.dicitonary word, 0-9 digit, etc) can be inserted instead
+  /// (e.g.dictionary word, 0-9 digit, etc) can be inserted instead
   /// Used for expressing patterns in punctuation and number Dawgs.
   static const UNICHAR_ID kPatternUnicharID = 0;
 
@@ -400,7 +400,7 @@
 //
 /// Concrete class that can operate on a compacted (squished) Dawg (read,
 /// search and write to file). This class is read-only in the sense that
-/// new words can not be added to an instance of SquishedDawg.
+/// new words cannot be added to an instance of SquishedDawg.
 /// The underlying representation of the nodes and edges in SquishedDawg
 /// is stored as a contiguous EDGE_ARRAY (read from file or given as an
 /// argument to the constructor).
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/dict/dict.cpp 
new/tesseract-5.3.1/src/dict/dict.cpp
--- old/tesseract-5.3.0/src/dict/dict.cpp       2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/src/dict/dict.cpp       2023-04-01 21:50:30.000000000 
+0200
@@ -114,7 +114,7 @@
                     " for each dict char above small word size.",
                     getCCUtil()->params())
     , double_MEMBER(stopper_allowable_character_badness, 3.0,
-                    "Max certaintly variation allowed in a word (in sigma)", 
getCCUtil()->params())
+                    "Max certainty variation allowed in a word (in sigma)", 
getCCUtil()->params())
     , INT_MEMBER(stopper_debug_level, 0, "Stopper debug level", 
getCCUtil()->params())
     , BOOL_MEMBER(stopper_no_acceptable_choices, false,
                   "Make AcceptableChoice() always return false. Useful"
@@ -171,7 +171,7 @@
 
 DawgCache *Dict::GlobalDawgCache() {
   // This global cache (a singleton) will outlive every Tesseract instance
-  // (even those that someone else might declare as global statics).
+  // (even those that someone else might declare as global static variables).
   static DawgCache cache;
   return &cache;
 }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/dict/permdawg.cpp 
new/tesseract-5.3.1/src/dict/permdawg.cpp
--- old/tesseract-5.3.0/src/dict/permdawg.cpp   2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/src/dict/permdawg.cpp   2023-04-01 21:50:30.000000000 
+0200
@@ -357,7 +357,7 @@
   }
   if (word_ending && char_frag_info->fragment) {
     if (debug) {
-      tprintf("Word can not end with a fragment\n");
+      tprintf("Word cannot end with a fragment\n");
     }
     return false;
   }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/lstm/recodebeam.h 
new/tesseract-5.3.1/src/lstm/recodebeam.h
--- old/tesseract-5.3.0/src/lstm/recodebeam.h   2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/src/lstm/recodebeam.h   2023-04-01 21:50:30.000000000 
+0200
@@ -211,8 +211,8 @@
   // Generates debug output of the content of the beams after a Decode.
   void DebugBeams(const UNICHARSET &unicharset) const;
 
-  // Extract the best charakters from the current decode iteration and block
-  // those symbols for the next iteration. In contrast to tesseracts standard
+  // Extract the best characters from the current decode iteration and block
+  // those symbols for the next iteration. In contrast to Tesseract's standard
   // method to chose the best overall node chain, this methods looks at a short
   // node chain segmented by the character boundaries and chooses the best
   // option independent of the remaining node chain.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/textord/makerow.cpp 
new/tesseract-5.3.1/src/textord/makerow.cpp
--- old/tesseract-5.3.0/src/textord/makerow.cpp 2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/src/textord/makerow.cpp 2023-04-01 21:50:30.000000000 
+0200
@@ -1336,7 +1336,7 @@
     // Try to search for two modes in row_cap_heights that could
     // be the xheight and the capheight (e.g. some of the rows
     // were lowercase, but did not have enough (a/de)scenders.
-    // If such two modes can not be found, this block is most
+    // If such two modes cannot be found, this block is most
     // likely all caps (or all small caps, in which case the code
     // still works as intended).
     compute_xheight_from_modes(
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/textord/tablerecog.cpp 
new/tesseract-5.3.1/src/textord/tablerecog.cpp
--- old/tesseract-5.3.0/src/textord/tablerecog.cpp      2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/textord/tablerecog.cpp      2023-04-01 
21:50:30.000000000 +0200
@@ -365,7 +365,7 @@
 
 // TODO(nbeato): Could be much better than this.
 // Examples:
-//   - Caclulate the percentage of filled cells.
+//   - Calculate the percentage of filled cells.
 //   - Calculate the average number of ColPartitions per cell.
 //   - Calculate the number of cells per row with partitions.
 //   - Check if ColPartitions in adjacent cells are similar.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/textord/topitch.cpp 
new/tesseract-5.3.1/src/textord/topitch.cpp
--- old/tesseract-5.3.0/src/textord/topitch.cpp 2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/src/textord/topitch.cpp 2023-04-01 21:50:30.000000000 
+0200
@@ -398,9 +398,8 @@
   int16_t mid_cuts; // no of cheap cuts
   float pitch_sd;   // sync rating
 
-  if (block_it.empty()
-      //      || block_it.data()==block_it.data_relative(1)
-      || !textord_blockndoc_fixed) {
+  if (!textord_blockndoc_fixed ||
+      block_it.empty() || block_it.data()->get_rows()->empty()) {
     return false;
   }
   shift_factor = gradient / (gradient * gradient + 1);
@@ -1105,7 +1104,7 @@
     float &best_sp_sd,          // space sd
     int16_t &best_mid_cuts,     // no of cheap cuts
     ICOORDELT_LIST *best_cells, // row cells
-    bool testing_on             // inidividual words
+    bool testing_on             // individual words
 ) {
   int pitch_delta;           // offset pitch
   int16_t mid_cuts;          // cheap cuts
@@ -1204,7 +1203,7 @@
     float &best_sp_sd,          // space sd
     int16_t &best_mid_cuts,     // no of cheap cuts
     ICOORDELT_LIST *best_cells, // row cells
-    bool testing_on             // inidividual words
+    bool testing_on             // individual words
 ) {
   int pitch_delta;    // offset pitch
   int16_t pixel;      // pixel coord
@@ -1297,7 +1296,7 @@
     float &sp_sd,              // space sd
     int16_t &mid_cuts,         // no of free cuts
     ICOORDELT_LIST *row_cells, // list of chop pts
-    bool testing_on,           // inidividual words
+    bool testing_on,           // individual words
     int16_t start,             // start of good range
     int16_t end                // end of good range
 ) {
@@ -1453,7 +1452,7 @@
     int16_t &occupation,       // no of occupied cells
     int16_t &mid_cuts,         // no of free cuts
     ICOORDELT_LIST *row_cells, // list of chop pts
-    bool testing_on,           // inidividual words
+    bool testing_on,           // individual words
     int16_t start,             // start of good range
     int16_t end                // end of good range
 ) {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/textord/topitch.h 
new/tesseract-5.3.1/src/textord/topitch.h
--- old/tesseract-5.3.0/src/textord/topitch.h   2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/src/textord/topitch.h   2023-04-01 21:50:30.000000000 
+0200
@@ -117,7 +117,7 @@
     float &best_sp_sd,          // space sd
     int16_t &best_mid_cuts,     // no of cheap cuts
     ICOORDELT_LIST *best_cells, // row cells
-    bool testing_on             // inidividual words
+    bool testing_on             // individual words
 );
 float tune_row_pitch2(          // find fp cells
     TO_ROW *row,                // row to do
@@ -129,7 +129,7 @@
     float &best_sp_sd,          // space sd
     int16_t &best_mid_cuts,     // no of cheap cuts
     ICOORDELT_LIST *best_cells, // row cells
-    bool testing_on             // inidividual words
+    bool testing_on             // individual words
 );
 float compute_pitch_sd(        // find fp cells
     TO_ROW *row,               // row to do
@@ -141,7 +141,7 @@
     float &sp_sd,              // space sd
     int16_t &mid_cuts,         // no of free cuts
     ICOORDELT_LIST *row_cells, // list of chop pts
-    bool testing_on,           // inidividual words
+    bool testing_on,           // individual words
     int16_t start = 0,         // start of good range
     int16_t end = 0            // end of good range
 );
@@ -154,7 +154,7 @@
     int16_t &occupation,       // no of occupied cells
     int16_t &mid_cuts,         // no of free cuts
     ICOORDELT_LIST *row_cells, // list of chop pts
-    bool testing_on,           // inidividual words
+    bool testing_on,           // individual words
     int16_t start = 0,         // start of good range
     int16_t end = 0            // end of good range
 );
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/training/CMakeLists.txt 
new/tesseract-5.3.1/src/training/CMakeLists.txt
--- old/tesseract-5.3.0/src/training/CMakeLists.txt     2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/training/CMakeLists.txt     2023-04-01 
21:50:30.000000000 +0200
@@ -259,12 +259,10 @@
     target_link_libraries(unicharset_training
                           PUBLIC common_training org.sw.demo.unicode.icu.i18n)
   else()
-    if(${CMAKE_VERSION} VERSION_LESS "3.12.0")
-      target_link_libraries(unicharset_training PUBLIC common_training
-                                                       PkgConfig::ICU)
+    if(PKG_CONFIG_FOUND)
+      target_link_libraries(unicharset_training PUBLIC common_training 
PkgConfig::ICU)
     else()
-      target_link_libraries(unicharset_training PUBLIC common_training
-                                                       ${ICU_LIBRARIES})
+      target_link_libraries(unicharset_training PUBLIC common_training 
${ICU_LIBRARIES})
     endif()
   endif()
   target_include_directories(unicharset_training
@@ -348,6 +346,7 @@
   # 
############################################################################
 
   add_executable(unicharset_extractor unicharset_extractor.cpp)
+  set_property(TARGET unicharset_extractor PROPERTY CXX_STANDARD 17)
   target_link_libraries(unicharset_extractor unicharset_training)
   project_group(unicharset_extractor "Training Tools")
   install(
@@ -388,11 +387,7 @@
         target_include_directories(pango_training BEFORE
                                    PUBLIC ${PANGO_INCLUDE_DIRS})
         target_compile_definitions(pango_training PUBLIC -DPANGO_ENABLE_ENGINE)
-        if(${CMAKE_VERSION} VERSION_LESS "3.12.0")
-          target_link_libraries(pango_training PUBLIC PkgConfig::PANGO)
-        else()
-          target_link_libraries(pango_training PUBLIC ${PANGO_LINK_LIBRARIES})
-        endif()
+        target_link_libraries(pango_training PUBLIC PkgConfig::PANGO)
       endif()
     endif()
     target_include_directories(pango_training
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/tesseract-5.3.0/src/training/pango/pango_font_info.cpp 
new/tesseract-5.3.1/src/training/pango/pango_font_info.cpp
--- old/tesseract-5.3.0/src/training/pango/pango_font_info.cpp  2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/training/pango/pango_font_info.cpp  2023-04-01 
21:50:30.000000000 +0200
@@ -725,7 +725,7 @@
              CAIRO_FONT_TYPE_USER) {
     printf("Using CAIRO_FONT_TYPE_USER.\n");
   } else if (!font_map) {
-    printf("Can not create pango cairo font map!\n");
+    printf("Cannot create pango cairo font map!\n");
   }
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/tesseract-5.3.0/src/training/unicharset/lang_model_helpers.cpp 
new/tesseract-5.3.1/src/training/unicharset/lang_model_helpers.cpp
--- old/tesseract-5.3.0/src/training/unicharset/lang_model_helpers.cpp  
2022-12-22 14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/training/unicharset/lang_model_helpers.cpp  
2023-04-01 21:50:30.000000000 +0200
@@ -239,6 +239,7 @@
     tprintf("Error writing output traineddata file!!\n");
     return EXIT_FAILURE;
   }
+  tprintf("Created %s/%s/%s.traineddata", output_dir.c_str(), lang.c_str(), 
lang.c_str());
   return EXIT_SUCCESS;
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/tesseract-5.3.0/src/training/unicharset_extractor.cpp 
new/tesseract-5.3.1/src/training/unicharset_extractor.cpp
--- old/tesseract-5.3.0/src/training/unicharset_extractor.cpp   2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/training/unicharset_extractor.cpp   2023-04-01 
21:50:30.000000000 +0200
@@ -21,6 +21,7 @@
 // a unicharset.
 
 #include <cstdlib>
+#include <filesystem>
 #include "boxread.h"
 #include "commandlineflags.h"
 #include "commontraining.h" // CheckSharedLibraryVersion
@@ -64,15 +65,21 @@
   UNICHARSET unicharset;
   // Load input files
   for (int arg = 1; arg < argc; ++arg) {
+    std::filesystem::path filePath = argv[arg];
     std::string file_data = tesseract::ReadFile(argv[arg]);
     if (file_data.empty()) {
       continue;
     }
     std::vector<std::string> texts;
-    if (ReadMemBoxes(-1, /*skip_blanks*/ true, &file_data[0],
-                     /*continue_on_failure*/ false, /*boxes*/ nullptr, &texts,
-                     /*box_texts*/ nullptr, /*pages*/ nullptr)) {
+    if (filePath.extension() == ".box") {
       tprintf("Extracting unicharset from box file %s\n", argv[arg]);
+      bool res = ReadMemBoxes(-1, /*skip_blanks*/ true, &file_data[0],
+                   /*continue_on_failure*/ false, /*boxes*/ nullptr, &texts,
+                   /*box_texts*/ nullptr, /*pages*/ nullptr);
+      if (!res) {
+        tprintf("Cannot read box data from '%s'\n", argv[arg]);
+        return EXIT_FAILURE;
+      }
     } else {
       tprintf("Extracting unicharset from plain text file %s\n", argv[arg]);
       texts.clear();
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/wordrec/language_model.cpp 
new/tesseract-5.3.1/src/wordrec/language_model.cpp
--- old/tesseract-5.3.0/src/wordrec/language_model.cpp  2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/wordrec/language_model.cpp  2023-04-01 
21:50:30.000000000 +0200
@@ -1010,7 +1010,7 @@
   assert(len != 0);
   // The ideal situation would be to have the classifier scores for
   // classifying each position as each of the characters in the unicharset.
-  // Since we can not do this because of speed, we add a very crude estimate
+  // Since we cannot do this because of speed, we add a very crude estimate
   // of what these scores for the "missing" classifications would sum up to.
   denom +=
       (dict_->getUnicharset().size() - len) * 
CertaintyScore(language_model_ngram_nonmatch_score);
@@ -1375,7 +1375,12 @@
   // features[PTRAIN_NUM_BAD_FONT] = vse.consistency_info.inconsistent_font;
 
   // Classifier-related features.
-  features[PTRAIN_RATING_PER_CHAR] = vse.ratings_sum / 
static_cast<float>(vse.outline_length);
+  if (vse.outline_length > 0.0f) {
+    features[PTRAIN_RATING_PER_CHAR] = vse.ratings_sum / vse.outline_length;
+  } else {
+    // Avoid FP division by 0.
+    features[PTRAIN_RATING_PER_CHAR] = 0.0f;
+  }
 }
 
 WERD_CHOICE *LanguageModel::ConstructWord(ViterbiStateEntry *vse, WERD_RES 
*word_res,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/wordrec/language_model.h 
new/tesseract-5.3.1/src/wordrec/language_model.h
--- old/tesseract-5.3.0/src/wordrec/language_model.h    2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/wordrec/language_model.h    2023-04-01 
21:50:30.000000000 +0200
@@ -267,7 +267,7 @@
   // could be pruned out (i.e. is neither a system/user/frequent dictionary
   // nor a top choice path).
   // In non-space delimited languages all paths can be "somewhat" dictionary
-  // words. In such languages we can not do dictionary-driven path pruning,
+  // words. In such languages we cannot do dictionary-driven path pruning,
   // so paths with non-empty dawg_info are considered prunable.
   inline bool PrunablePath(const ViterbiStateEntry &vse) {
     if (vse.top_choice_flags) {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/src/wordrec/segsearch.cpp 
new/tesseract-5.3.1/src/wordrec/segsearch.cpp
--- old/tesseract-5.3.0/src/wordrec/segsearch.cpp       2022-12-22 
14:57:57.000000000 +0100
+++ new/tesseract-5.3.1/src/wordrec/segsearch.cpp       2023-04-01 
21:50:30.000000000 +0200
@@ -242,7 +242,7 @@
   if (lst == nullptr) {
     ratings->put(pain_point.col, pain_point.row, classified);
   } else {
-    // We can not delete old BLOB_CHOICEs, since they might contain
+    // We cannot delete old BLOB_CHOICEs, since they might contain
     // ViterbiStateEntries that are parents of other "active" entries.
     // Thus if the matrix cell already contains classifications we add
     // the new ones to the beginning of the list.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/tesseract-5.3.0/tesseract.pc.cmake 
new/tesseract-5.3.1/tesseract.pc.cmake
--- old/tesseract-5.3.0/tesseract.pc.cmake      2022-12-22 14:57:57.000000000 
+0100
+++ new/tesseract-5.3.1/tesseract.pc.cmake      2023-04-01 21:50:30.000000000 
+0200
@@ -1,6 +1,6 @@
 prefix=@CMAKE_INSTALL_PREFIX@
 exec_prefix=${prefix}/bin
-libdir=${prefix}/lib
+libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
 includedir=${prefix}/include
 
 Name: @tesseract_NAME@

Reply via email to