Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package tesseract-ocr for openSUSE:Factory checked in at 2024-07-16 22:04:48 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/tesseract-ocr (Old) and /work/SRC/openSUSE:Factory/.tesseract-ocr.new.17339 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "tesseract-ocr" Tue Jul 16 22:04:48 2024 rev:18 rq:1187878 version:5.4.1 Changes: -------- --- /work/SRC/openSUSE:Factory/tesseract-ocr/tesseract-ocr.changes 2024-06-07 15:05:59.629514938 +0200 +++ /work/SRC/openSUSE:Factory/.tesseract-ocr.new.17339/tesseract-ocr.changes 2024-07-16 22:05:14.664043756 +0200 @@ -1,0 +2,11 @@ +Tue Jul 2 17:03:59 UTC 2024 - ecsos <ec...@opensuse.org> + +- Update to version 5.4.1: + - Avoid FP overflow in NormEvidenceOf (fixes issue #4257) in #4259 + - Update deprecated Node.js 16 GitHub actions in #4262 + - Fix code style issues which were reported in #4263 + - Fix some issues which were reported in #4266 + - Fix more Codacy issues in #4267 + - Several build fixes + +------------------------------------------------------------------- Old: ---- tesseract-5.4.0.tar.gz New: ---- tesseract-5.4.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ tesseract-ocr.spec ++++++ --- /var/tmp/diff_new_pack.IxcDuN/_old 2024-07-16 22:05:15.432071786 +0200 +++ /var/tmp/diff_new_pack.IxcDuN/_new 2024-07-16 22:05:15.432071786 +0200 @@ -18,7 +18,7 @@ %define lname libtesseract5 Name: tesseract-ocr -Version: 5.4.0 +Version: 5.4.1 Release: 0 Summary: Open Source OCR Engine License: Apache-2.0 AND GPL-2.0-or-later ++++++ tesseract-5.4.0.tar.gz -> tesseract-5.4.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/.github/ISSUE_TEMPLATE.md new/tesseract-5.4.1/.github/ISSUE_TEMPLATE.md --- old/tesseract-5.4.0/.github/ISSUE_TEMPLATE.md 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/.github/ISSUE_TEMPLATE.md 1970-01-01 01:00:00.000000000 +0100 @@ -1,23 +0,0 @@ -Before you submit an issue, please review [the guidelines for this repository](https://github.com/tesseract-ocr/tesseract/blob/main/CONTRIBUTING.md). - -Please report an issue only for a BUG, not for asking questions. - -Note that it will be much easier for us to fix the issue if a test case that -reproduces the problem is provided. Ideally this test case should not have any -external dependencies. Provide a copy of the image or link to files for the test case. - -Please delete this text and fill in the template below. - ------------------------- - -### Environment - -* **Tesseract Version**: <!-- compulsory. you must provide your version --> -* **Commit Number**: <!-- optional. if known - specify commit used, if built from source --> -* **Platform**: <!-- either `uname -a` output, or if Windows, version and 32-bit or 64-bit --> - -### Current Behavior: - -### Expected Behavior: - -### Suggested Fix: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/.github/workflows/autotools-macos.yml new/tesseract-5.4.1/.github/workflows/autotools-macos.yml --- old/tesseract-5.4.0/.github/workflows/autotools-macos.yml 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/.github/workflows/autotools-macos.yml 2024-06-11 20:18:21.000000000 +0200 @@ -93,7 +93,7 @@ run: | export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig" cd test - ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp $(pkg-config --cflags --libs tesseract lept) -pthread -std=c++11 -framework accelerate + ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp $(pkg-config --cflags --libs tesseract lept) -pthread -std=c++17 -framework accelerate ./basicapitest - name: Display Compiler Version @@ -193,7 +193,7 @@ run: | export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig" cd test - ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/opt/local/include -L/opt/local/lib $(pkg-config --cflags --libs tesseract lept) -pthread -std=c++11 -framework Accelerate + ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/opt/local/include -L/opt/local/lib $(pkg-config --cflags --libs tesseract lept) -pthread -std=c++17 -framework Accelerate ./basicapitest - name: Display Compiler Version diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/.github/workflows/autotools.yml new/tesseract-5.4.1/.github/workflows/autotools.yml --- old/tesseract-5.4.0/.github/workflows/autotools.yml 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/.github/workflows/autotools.yml 2024-06-11 20:18:21.000000000 +0200 @@ -92,7 +92,7 @@ run: | export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig" cd test - ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/usr/local/include -L/usr/local/lib `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++11 + ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/usr/local/include -L/usr/local/lib `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++17 ./basicapitest - name: Setup for Tesseract benchmark using image from issue 263 fifteen times in a list file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/.github/workflows/cifuzz.yml new/tesseract-5.4.1/.github/workflows/cifuzz.yml --- old/tesseract-5.4.0/.github/workflows/cifuzz.yml 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/.github/workflows/cifuzz.yml 2024-06-11 20:18:21.000000000 +0200 @@ -26,7 +26,7 @@ fuzz-seconds: 600 dry-run: false - name: Upload Crash - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/.github/workflows/cmake-win64.yml new/tesseract-5.4.1/.github/workflows/cmake-win64.yml --- old/tesseract-5.4.0/.github/workflows/cmake-win64.yml 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/.github/workflows/cmake-win64.yml 2024-06-11 20:18:21.000000000 +0200 @@ -9,6 +9,7 @@ env: ILOC: d:/a/local + png_ver: 1643 jobs: build: @@ -16,19 +17,26 @@ runs-on: windows-latest steps: - uses: ilammy/setup-nasm@v1 + - uses: microsoft/setup-msbuild@v2 - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: submodules: recursive - run: git fetch --prune --unshallow --tags + - name: Get the version id: get_version + continue-on-error: true run: | $git_info=$(git describe --tags HEAD) + $stamp=$(date +'%Y-%m-%d_%H%M%S') echo "version=${git_info}" >> $env:GITHUB_OUTPUT + echo "stamp=${stamp}" >> $env:GITHUB_OUTPUT + - name: Setup Installation Location run: | mkdir ${{env.ILOC}} + - name: Uninstall Perl run: | choco uninstall strawberryperl @@ -39,17 +47,17 @@ git clone --depth 1 https://github.com/zlib-ng/zlib-ng.git cd zlib-ng cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_SHARED_LIBS=OFF -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF -DINSTALL_UTILS=OFF - cmake --build build --config Release --target install + cmake --build build --target install cd .. - name: Build and Install libpng shell: cmd run: | - curl -sSL -o lpng1639.zip https://download.sourceforge.net/libpng/lpng1640.zip - unzip.exe -qq lpng1640.zip - cd lpng1639 - cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DPNG_TESTS=OFF -DPNG_SHARED=OFF - cmake --build build --config Release --target install + curl -sSL -o lpng${{env.png_ver}}.zip https://download.sourceforge.net/libpng/lpng${{env.png_ver}}.zip + unzip.exe -qq lpng${{env.png_ver}}.zip + cd lpng${{env.png_ver}} + cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DPNG_TESTS=OFF -DPNG_SHARED=OFF + cmake --build build --target install cd .. - name: Build and Install libjpeg @@ -57,8 +65,8 @@ run: | git clone --depth 1 https://github.com/libjpeg-turbo/libjpeg-turbo.git cd libjpeg-turbo - cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DWITH_TURBOJPEG=OFF -DENABLE_SHARED=OFF - cmake --build build --config Release --target install + cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DWITH_TURBOJPEG=OFF -DENABLE_SHARED=OFF + cmake --build build --target install cd .. - name: Build and Install jbigkit @@ -67,7 +75,7 @@ git clone --depth 1 https://github.com/zdenop/jbigkit.git cd jbigkit cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_PROGRAMS=OFF -DBUILD_TOOLS=OFF -DCMAKE_WARN_DEPRECATED=OFF - cmake --build build --config Release --target install + cmake --build build --target install cd .. - name: Build and Install libtiff @@ -75,8 +83,8 @@ run: | git clone -c advice.detachedHead=false -b "v4.6.0" --depth 1 https://gitlab.com/libtiff/libtiff.git cd libtiff - cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -Dtiff-tools=OFF -Dtiff-tests=OFF -Dtiff-contrib=OFF -Dtiff-docs=OFF - cmake --build build --config Release --target install + cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -Dtiff-tools=OFF -Dtiff-tests=OFF -Dtiff-contrib=OFF -Dtiff-docs=OFF + cmake --build build --target install cd .. - name: Build and Install leptonica @@ -85,8 +93,8 @@ echo "Building leptonica..." git clone --depth 1 https://github.com/DanBloomberg/leptonica.git cd leptonica - cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_PROG=OFF -DBUILD_SHARED_LIBS=ON - cmake --build build --config Release --target install + cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_PROG=OFF -DBUILD_SHARED_LIBS=ON + cmake --build build --target install - name: Remove not needed tools Before building tesseract shell: cmd @@ -96,13 +104,13 @@ - name: Build and Install tesseract shell: cmd run: | - cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DENABLE_LTO=ON -DBUILD_TRAINING_TOOLS=OFF -DFAST_FLOAT=ON -DGRAPHICS_DISABLED=ON -DOPENMP_BUILD=OFF - cmake --build build --config Release --target install + cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DENABLE_LTO=ON -DBUILD_TRAINING_TOOLS=OFF -DFAST_FLOAT=ON -DGRAPHICS_DISABLED=ON -DOPENMP_BUILD=OFF + cmake --build build --target install - name: Upload Build Results - uses: actions/upload-artifact@v3.1.1 + uses: actions/upload-artifact@v4 with: - name: tesseract-${{ steps.get_version.outputs.version }}-VS2019_win64 + name: tesseract-${{ steps.get_version.outputs.version }}-${{steps.get_version.outputs.stamp}}-VS2019_win64 path: ${{env.ILOC}} retention-days: 5 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/.github/workflows/cmake.yml new/tesseract-5.4.1/.github/workflows/cmake.yml --- old/tesseract-5.4.0/.github/workflows/cmake.yml 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/.github/workflows/cmake.yml 2024-06-11 20:18:21.000000000 +0200 @@ -133,7 +133,7 @@ run: | export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$PKG_CONFIG_PATH" cd test - ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread -std=c++11 + ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread -std=c++17 ./basicapitest if: runner.os == 'Linux' @@ -141,7 +141,7 @@ run: | export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/Library/Homebrew/os/mac/pkgconfig/11:$PKG_CONFIG_PATH" cd test - ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libcurl) -pthread -std=c++11 + ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libcurl) -pthread -std=c++17 ./basicapitest if: runner.os == 'macOS' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/.github/workflows/sw.yml new/tesseract-5.4.1/.github/workflows/sw.yml --- old/tesseract-5.4.0/.github/workflows/sw.yml 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/.github/workflows/sw.yml 2024-06-11 20:18:21.000000000 +0200 @@ -88,14 +88,14 @@ - name: Upload Unit Test Results if: always() && matrix.os != 'windows-2022' - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Test Results (${{ matrix.os }}) path: .sw/test/results.xml - name: Publish Test Report if: always() && matrix.os != 'windows-2022' - uses: mikepenz/action-junit-report@v3 + uses: mikepenz/action-junit-report@v4 with: check_name: test (${{ matrix.os }}) report_paths: .sw/test/results.xml diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/CMakeLists.txt new/tesseract-5.4.1/CMakeLists.txt --- old/tesseract-5.4.0/CMakeLists.txt 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/CMakeLists.txt 2024-06-11 20:18:21.000000000 +0200 @@ -927,12 +927,19 @@ DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig RENAME tesseract.pc) install(TARGETS tesseract DESTINATION bin) +if (MSVC) + install(FILES $<TARGET_PDB_FILE:${PROJECT_NAME}> DESTINATION bin OPTIONAL) +endif() install( TARGETS libtesseract EXPORT TesseractTargets RUNTIME DESTINATION bin + RUNTIME DESTINATION bin LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) +if (MSVC) + install(FILES $<TARGET_PDB_FILE:libtesseract> DESTINATION bin OPTIONAL) +endif() install( EXPORT TesseractTargets NAMESPACE Tesseract:: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/CONTRIBUTING.md new/tesseract-5.4.1/CONTRIBUTING.md --- old/tesseract-5.4.0/CONTRIBUTING.md 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/CONTRIBUTING.md 2024-06-11 20:18:21.000000000 +0200 @@ -7,6 +7,7 @@ If you think you found a bug in Tesseract, please create an issue. Use the [user forum](https://groups.google.com/g/tesseract-ocr) instead of creating an issue if ... + * You have problems using Tesseract and need some help. * You have problems installing the software. * You are not satisfied with the accuracy of the OCR, and want to ask how you can improve it. Note: You should first read the [ImproveQuality](https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html) documentation. @@ -14,9 +15,10 @@ * You have a general question. An issue should only be reported if the platform you are using is one of these: - * Linux (but not a version that is more than 4 years old) - * Windows (Windows 7 or newer version) - * macOS (last 3 releases) + +* Linux (but not a version that is more than 4 years old) +* Windows (Windows 7 or newer version) +* macOS (last 3 releases) For older versions or other operating systems, use the Tesseract forum. @@ -39,8 +41,9 @@ BUT don't post files with private info (about yourself or others). When attaching a file to the issue report / forum ... - * Do not post a file larger than 20 MB. - * GitHub supports only few file name extensions like `.png` or `.txt`. If GitHub rejects your files, you can compress them using a program that can produce a zip archive and then load this zip file to GitHub. + +* Do not post a file larger than 20 MB. +* GitHub supports only few file name extensions like `.png` or `.txt`. If GitHub rejects your files, you can compress them using a program that can produce a zip archive and then load this zip file to GitHub. Do not attach programs or libraries to your issues/posts. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/ChangeLog new/tesseract-5.4.1/ChangeLog --- old/tesseract-5.4.0/ChangeLog 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/ChangeLog 2024-06-11 20:18:21.000000000 +0200 @@ -1,3 +1,7 @@ +2024-06-11 - V5.4.1 +* Avoid FP overflow in NormEvidenceOf (fixes issue #4257) (#4259) +* Small build fixes and code improvements (#4262, #4263, #4266, #4267) + 2024-06-06 - V5.4.0 * Small build fixes and code improvements (#4241, #4243, #4244, #4245, #4246, #4248, #4249, #4250, #4253) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/INSTALL.GIT.md new/tesseract-5.4.1/INSTALL.GIT.md --- old/tesseract-5.4.0/INSTALL.GIT.md 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/INSTALL.GIT.md 2024-06-11 20:18:21.000000000 +0200 @@ -1,4 +1,4 @@ -# autotools (LINUX/UNIX , msys...) +## autotools (LINUX/UNIX , msys...) If you have cloned Tesseract from GitHub, you must generate the configure script. @@ -9,22 +9,23 @@ You need Leptonica 1.74.2 (minimum) for Tesseract 4.0x. Known dependencies for training tools (excluding leptonica): - * compiler with c++11 support - * automake - * pkg-config - * pango-devel - * cairo-devel - * icu-devel + +* compiler with c++17 support +* automake +* pkg-config +* pango-devel +* cairo-devel +* icu-devel So, the steps for making Tesseract are: - $ ./autogen.sh - $ ./configure - $ make - $ sudo make install - $ sudo ldconfig - $ make training - $ sudo make training-install + ./autogen.sh + ./configure + make + sudo make install + sudo ldconfig + make training + sudo make training-install You need to install at least English language and OSD traineddata files to `TESSDATA_PREFIX` directory. @@ -35,8 +36,7 @@ (Repository is huge - more that 1.2 GB. You do NOT need to download traineddata files for all languages). - $ git clone https://github.com/tesseract-ocr/tessdata.git tesseract-ocr.tessdata - + git clone https://github.com/tesseract-ocr/tessdata.git tesseract-ocr.tessdata You need an Internet connection and [curl](https://curl.haxx.se/) to compile `ScrollView.jar` because the build will automatically download @@ -46,22 +46,20 @@ Just run: - $ make ScrollView.jar + make ScrollView.jar and follow the instruction on [Viewer Debugging](https://tesseract-ocr.github.io/tessdoc/ViewerDebugging.html). - -# CMAKE +## cmake There is alternative build system based on multiplatform [cmake](https://cmake.org/) -## LINUX - - $ mkdir build - $ cd build && cmake .. && make - $ sudo make install +### LINUX + mkdir build + cd build && cmake .. && make + sudo make install -## WINDOWS +### WINDOWS See the [documentation](https://tesseract-ocr.github.io/tessdoc/) for more information on this. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/VERSION new/tesseract-5.4.1/VERSION --- old/tesseract-5.4.0/VERSION 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/VERSION 2024-06-11 20:18:21.000000000 +0200 @@ -1 +1 @@ -5.4.0 +5.4.1 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/configure.ac new/tesseract-5.4.1/configure.ac --- old/tesseract-5.4.0/configure.ac 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/configure.ac 2024-06-11 20:18:21.000000000 +0200 @@ -29,7 +29,7 @@ # Define date of package, etc. Could be useful in auto-generated # documentation. PACKAGE_YEAR=2024 -PACKAGE_DATE="06/06" +PACKAGE_DATE="06/11" abs_top_srcdir=`AS_DIRNAME([$0])` diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/doc/generate_manpages.sh new/tesseract-5.4.1/doc/generate_manpages.sh --- old/tesseract-5.4.0/doc/generate_manpages.sh 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/doc/generate_manpages.sh 2024-06-11 20:18:21.000000000 +0200 @@ -25,9 +25,9 @@ else for src in *.asc; do pagename=${src/.asc/} - (${asciidoc} -d manpage ${src} && - ${asciidoc} -d manpage -b docbook ${src} && - ${xsltproc} --nonet ${man_xslt} ${pagename}.xml) || + (${asciidoc} -d manpage "${src}" && + ${asciidoc} -d manpage -b docbook "${src}" && + ${xsltproc} --nonet ${man_xslt} "${pagename}".xml) || echo "Error generating ${pagename}" done fi diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/api/pdfrenderer.cpp new/tesseract-5.4.1/src/api/pdfrenderer.cpp --- old/tesseract-5.4.0/src/api/pdfrenderer.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/api/pdfrenderer.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -242,13 +242,13 @@ double word_length; double x, y; { - int px = word_x1; - int py = word_y1; double l2 = dist2(line_x1, line_y1, line_x2, line_y2); if (l2 == 0) { x = line_x1; y = line_y1; } else { + int px = word_x1; + int py = word_y1; double t = ((px - line_x2) * (line_x2 - line_x1) + (py - line_y2) * (line_y2 - line_y1)) / l2; x = line_x2 + t * (line_x2 - line_x1); y = line_y2 + t * (line_y2 - line_y1); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/arch/intsimdmatrixavx2.cpp new/tesseract-5.4.1/src/arch/intsimdmatrixavx2.cpp --- old/tesseract-5.4.0/src/arch/intsimdmatrixavx2.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/arch/intsimdmatrixavx2.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -568,7 +568,6 @@ output += group_size; } group_size /= 2; - w_step /= 2; if (output + group_size <= rounded_num_out) { PartialMatrixDotVector8(wi, scales, u, rounded_num_in, v); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/ccmain/applybox.cpp new/tesseract-5.4.1/src/ccmain/applybox.cpp --- old/tesseract-5.4.0/src/ccmain/applybox.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/ccmain/applybox.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -258,10 +258,10 @@ } const double e = exp(1.0); // The base of natural logs. unsigned blob_number; - int right_chop_index = 0; if (!assume_fixed_pitch_char_segment) { // We only chop if the language is not fixed pitch like CJK. SEAM *seam = nullptr; + int right_chop_index = 0; while ((seam = chop_one_blob(boxes, blob_choices, word_res, &blob_number)) != nullptr) { word_res->InsertSeam(blob_number, seam); BLOB_CHOICE *left_choice = blob_choices[blob_number]; @@ -685,6 +685,7 @@ void Tesseract::TidyUp(PAGE_RES *page_res) { int ok_blob_count = 0; int bad_blob_count = 0; + // TODO: check usage of ok_word_count. int ok_word_count = 0; int unlabelled_words = 0; PAGE_RES_IT pr_it(page_res); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/ccmain/control.cpp new/tesseract-5.4.1/src/ccmain/control.cpp --- old/tesseract-5.4.0/src/ccmain/control.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/ccmain/control.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -949,6 +949,7 @@ } real_word->AddSelectedOutlines(wanted, wanted_blobs, wanted_outlines, nullptr); AssignDiacriticsToNewBlobs(outlines, pass, real_word, pr_it, &word_wanted, &target_blobs); + // TODO: check code. int non_overlapped = 0; int non_overlapped_used = 0; for (unsigned i = 0; i < word_wanted.size(); ++i) { @@ -1121,9 +1122,9 @@ C_BLOB *blob, const std::vector<C_OUTLINE *> &outlines, int num_outlines, std::vector<bool> *ok_outlines) { - std::string best_str; float target_cert = certainty_threshold; if (blob != nullptr) { + std::string best_str; float target_c2; target_cert = ClassifyBlobAsWord(pass, pr_it, blob, best_str, &target_c2); if (debug_noise_removal) { @@ -1797,9 +1798,6 @@ } bool Tesseract::check_debug_pt(WERD_RES *word, int location) { - bool show_map_detail = false; - int16_t i; - if (!test_pt) { return false; } @@ -1811,6 +1809,7 @@ if (location < 0) { return true; // For breakpoint use } + bool show_map_detail = false; tessedit_rejection_debug.set_value(true); debug_x_ht_level.set_value(2); tprintf("\n\nTESTWD::"); @@ -1864,7 +1863,7 @@ tprintf("\n"); if (show_map_detail) { tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str()); - for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) { + for (unsigned i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) { tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]); word->reject_map[i].full_print(debug_fp); } @@ -1891,13 +1890,12 @@ int16_t *font_out, // output font int8_t *font_count // output count ) { - int16_t font; // font index - int32_t count; // pile count - if (fonts->get_total() > 0) { - font = static_cast<int16_t>(fonts->mode()); + // font index + int16_t font = static_cast<int16_t>(fonts->mode()); *font_out = font; - count = fonts->pile_count(font); + // pile count + int32_t count = fonts->pile_count(font); *font_count = count < INT8_MAX ? count : INT8_MAX; fonts->add(font, -*font_count); } else { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/ccmain/docqual.cpp new/tesseract-5.4.1/src/ccmain/docqual.cpp --- old/tesseract-5.4.0/src/ccmain/docqual.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/ccmain/docqual.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -60,10 +60,10 @@ } int16_t Tesseract::word_outline_errs(WERD_RES *word) { - int16_t i = 0; int16_t err_count = 0; if (word->rebuild_word != nullptr) { + int16_t i = 0; for (unsigned b = 0; b < word->rebuild_word->NumBlobs(); ++b) { TBLOB *blob = word->rebuild_word->blobs[b]; err_count += count_outline_errs(word->best_choice->unichar_string()[i], blob->NumOutlines()); @@ -209,13 +209,8 @@ void Tesseract::doc_and_block_rejection( // reject big chunks PAGE_RES_IT &page_res_it, bool good_quality_doc) { - int16_t block_no = 0; - int16_t row_no = 0; BLOCK_RES *current_block; - ROW_RES *current_row; - bool rej_word; - bool prev_word_rejected; int16_t char_quality = 0; int16_t accepted_char_quality; @@ -238,7 +233,7 @@ WERD_RES *word; while ((word = page_res_it.word()) != nullptr) { current_block = page_res_it.block(); - block_no = current_block->block->pdblk.index(); + int16_t block_no = current_block->block->pdblk.index(); if (current_block->char_count > 0 && (current_block->rej_count * 100.0 / current_block->char_count) > tessedit_reject_block_percent) { @@ -246,8 +241,9 @@ tprintf("REJECTING BLOCK %d #chars: %d; #Rejects: %d\n", block_no, current_block->char_count, current_block->rej_count); } - prev_word_rejected = false; + bool prev_word_rejected = false; while ((word = page_res_it.word()) != nullptr && (page_res_it.block() == current_block)) { + bool rej_word; if (tessedit_preserve_blk_rej_perfect_wds) { rej_word = word->reject_map.reject_count() > 0 || word->reject_map.length() < tessedit_preserve_min_wd_len; @@ -284,9 +280,9 @@ } /* Walk rows in block testing for row rejection */ - row_no = 0; + int16_t row_no = 0; while (page_res_it.word() != nullptr && page_res_it.block() == current_block) { - current_row = page_res_it.row(); + ROW_RES *current_row = page_res_it.row(); row_no++; /* Reject whole row if: fraction of chars on row which are rejected exceed a limit AND @@ -302,9 +298,10 @@ tprintf("REJECTING ROW %d #chars: %d; #Rejects: %d\n", row_no, current_row->char_count, current_row->rej_count); } - prev_word_rejected = false; + bool prev_word_rejected = false; while ((word = page_res_it.word()) != nullptr && page_res_it.row() == current_row) { /* Preserve words on good docs unless they are mostly rejected*/ + bool rej_word; if (!tessedit_row_rej_good_docs && good_quality_doc) { rej_word = word->reject_map.reject_count() / static_cast<float>(word->reject_map.length()) > @@ -448,8 +445,6 @@ } bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level) { - float rating_per_ch; - int adjusted_len; int crunch_mode = 0; if (word->best_choice->unichar_string().empty() || @@ -457,11 +452,11 @@ word->best_choice->unichar_string().size())) { crunch_mode = 1; } else { - adjusted_len = word->reject_map.length(); + int adjusted_len = word->reject_map.length(); if (adjusted_len > crunch_rating_max) { adjusted_len = crunch_rating_max; } - rating_per_ch = word->best_choice->rating() / adjusted_len; + float rating_per_ch = word->best_choice->rating() / adjusted_len; if (rating_per_ch > crunch_terrible_rating) { crunch_mode = 2; @@ -528,7 +523,6 @@ } void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) { - WERD_RES *word; PAGE_RES_IT copy_it; bool deleting_from_bol = false; bool marked_delete_point = false; @@ -539,7 +533,7 @@ page_res_it.restart_page(); while (page_res_it.word() != nullptr) { - word = page_res_it.word(); + WERD_RES *word = page_res_it.word(); delete_mode = word_deletable(word, debug_delete_mode); if (delete_mode != CR_NONE) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/ccmain/fixspace.cpp new/tesseract-5.4.1/src/ccmain/fixspace.cpp --- old/tesseract-5.4.0/src/ccmain/fixspace.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/ccmain/fixspace.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -171,7 +171,6 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) { int16_t best_score; WERD_RES_LIST current_perm; - int16_t current_score; bool improved = false; best_score = eval_word_spacing(best_perm); // default score @@ -183,7 +182,7 @@ while ((best_score != PERFECT_WERDS) && !current_perm.empty()) { match_current_words(current_perm, row, block); - current_score = eval_word_spacing(current_perm); + int16_t current_score = eval_word_spacing(current_perm); dump_words(current_perm, current_score, 2, improved); if (current_score > best_score) { best_perm.clear(); @@ -201,11 +200,10 @@ void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) { WERD_RES_IT src_it(&src_list); WERD_RES_IT new_it(&new_list); - WERD_RES *src_wd; WERD_RES *new_wd; for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { - src_wd = src_it.data(); + WERD_RES *src_wd = src_it.data(); if (!src_wd->combination) { new_wd = WERD_RES::deep_copy(src_wd); new_wd->combination = false; @@ -269,8 +267,6 @@ bool prev_char_1 = false; // prev ch a "1/I/l"? bool prev_char_digit = false; // prev ch 2..9 or 0 const char *punct_chars = "!\"`',.:;"; - bool prev_char_punct = false; - do { // current word WERD_RES *word = word_res_it.data(); @@ -327,6 +323,7 @@ /* Add 1 to total score for every joined punctuation regardless of context and rejtn */ if (tessedit_prefer_joined_punct) { + bool prev_char_punct; for (i = 0, offset = 0, prev_char_punct = false; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) { bool current_char_punct = @@ -393,8 +390,6 @@ WERD_RES_IT prev_word_it(&words); WERD_RES *word; WERD_RES *prev_word; - WERD_RES *combo; - WERD *copy_word; int16_t prev_right = -INT16_MAX; TBOX box; int16_t gap; @@ -425,12 +420,13 @@ gap = box.left() - prev_right; if (gap <= min_gap) { prev_word = prev_word_it.data(); + WERD_RES *combo; if (prev_word->combination) { combo = prev_word; } else { /* Make a new combination and insert before * the first word being joined. */ - copy_word = new WERD; + auto *copy_word = new WERD; *copy_word = *(prev_word->word); // deep copy combo = new WERD_RES(copy_word); @@ -546,7 +542,6 @@ WERD_RES *word_res; WERD_RES_LIST sub_word_list; WERD_RES_IT sub_word_list_it(&sub_word_list); - int16_t blob_index; int16_t new_length; float junk; @@ -556,7 +551,7 @@ return; } - blob_index = worst_noise_blob(word_res, &junk); + auto blob_index = worst_noise_blob(word_res, &junk); if (blob_index < 0) { return; } @@ -623,7 +618,6 @@ WERD_RES_IT worst_word_it; float worst_noise_score = 9999; int worst_blob_index = -1; // Noisiest blob of noisiest wd - int blob_index; // of wds noisiest blob float noise_score; // of wds noisiest blob WERD_RES *word_res; C_BLOB_IT blob_it; @@ -636,7 +630,7 @@ int16_t i; for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - blob_index = worst_noise_blob(word_it.data(), &noise_score); + auto blob_index = worst_noise_blob(word_it.data(), &noise_score); if (blob_index > -1 && worst_noise_score > noise_score) { worst_noise_score = noise_score; worst_blob_index = blob_index; @@ -806,7 +800,6 @@ void fixspace_dbg(WERD_RES *word) { TBOX box = word->word->bounding_box(); const bool show_map_detail = false; - int16_t i; box.print(); tprintf(" \"%s\" ", word->best_choice->unichar_string().c_str()); @@ -816,7 +809,7 @@ tprintf("\n"); if (show_map_detail) { tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str()); - for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) { + for (unsigned i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) { tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]); word->reject_map[i].full_print(debug_fp); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/ccmain/output.cpp new/tesseract-5.4.1/src/ccmain/output.cpp --- old/tesseract-5.4.0/src/ccmain/output.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/ccmain/output.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -101,11 +101,11 @@ bool force_eol) { // override tilde crunch? WERD_RES *word = page_res_it.word(); const UNICHARSET &uchset = *word->uch_set; - bool need_reject = false; UNICHAR_ID space = uchset.unichar_to_id(" "); if ((word->unlv_crunch_mode != CR_NONE || word->best_choice->empty()) && !tessedit_zero_kelvin_rejection && !tessedit_word_for_word) { + bool need_reject = false; if ((word->unlv_crunch_mode != CR_DELETE) && (!stats_.tilde_crunch_written || ((word->unlv_crunch_mode == CR_KEEP_SPACE) && (word->word->space() > 0) && diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/ccmain/paragraphs.cpp new/tesseract-5.4.1/src/ccmain/paragraphs.cpp --- old/tesseract-5.4.0/src/ccmain/paragraphs.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/ccmain/paragraphs.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -2407,8 +2407,8 @@ // Set up text, lword_text, and rword_text (mostly for debug printing). std::string fake_text; PageIterator pit(static_cast<const PageIterator &>(it)); - bool first_word = true; if (!pit.Empty(RIL_WORD)) { + bool first_word = true; do { fake_text += "x"; if (first_word) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/ccmain/pgedit.cpp new/tesseract-5.4.1/src/ccmain/pgedit.cpp --- old/tesseract-5.4.0/src/ccmain/pgedit.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/ccmain/pgedit.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -703,9 +703,7 @@ WERD_RES *word_res = pr_it->word(); WERD *word = word_res->word; TBOX word_bb; // word bounding box - int word_height; // ht of word BB bool displayed_something = false; - float shift; // from bot left if (color_mode != CM_RAINBOW && word_res->box_word != nullptr) { # ifndef DISABLED_LEGACY_ENGINE @@ -842,13 +840,14 @@ if (text.length() > 0) { word_bb = word->bounding_box(); image_win->Pen(ScrollView::RED); - word_height = word_bb.height(); - int text_height = 0.50 * word_height; + auto word_height = word_bb.height(); + int text_height = word_height / 2; if (text_height > 20) { text_height = 20; } image_win->TextAttributes("Arial", text_height, false, false, false); - shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f; + // from bot left + float shift = (word_height < word_bb.width()) ? 0.25f * word_height : 0.0f; image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height, text.c_str()); if (blame.length() > 0) { image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height - text_height, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/ccmain/reject.cpp new/tesseract-5.4.1/src/ccmain/reject.cpp --- old/tesseract-5.4.0/src/ccmain/reject.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/ccmain/reject.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -293,8 +293,6 @@ int16_t i; int16_t offset; bool non_conflict_set_char; // non conf set a/n? - bool conflict = false; - bool allow_1s; ACCEPTABLE_WERD_TYPE word_type; bool dict_perm_type; bool dict_word_ok; @@ -411,11 +409,11 @@ Else reject all conflict chs */ if (word_contains_non_1_digit(word, lengths)) { - allow_1s = + bool allow_1s = (alpha_count(word, lengths) == 0) || (word_res->best_choice->permuter() == NUMBER_PERM); int16_t offset; - conflict = false; + bool conflict = false; for (i = 0, offset = 0; word[offset] != '\0'; offset += word_res->best_choice->unichar_lengths()[i++]) { if ((!allow_1s || (word[offset] != '1')) && diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/classify/intmatcher.cpp new/tesseract-5.4.1/src/classify/intmatcher.cpp --- old/tesseract-5.4.0/src/classify/intmatcher.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/classify/intmatcher.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -892,7 +892,6 @@ uint16_t ProtoNum; uint8_t ProtoWordNum; PROTO_SET_STRUCT *ProtoSet; - uint16_t ActualProtoNum; if (PrintMatchSummaryOn(Debug)) { tprintf("Configuration Mask:\n"); @@ -912,9 +911,8 @@ if (PrintMatchSummaryOn(Debug)) { tprintf("Proto Mask:\n"); for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); for (ProtoWordNum = 0; ProtoWordNum < 2; ProtoWordNum++, ProtoMask++) { - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); + uint16_t ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); for (ProtoNum = 0; ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1)) && (ActualProtoNum < ClassTemplate->NumProtos)); ProtoNum++, ActualProtoNum++) { @@ -934,7 +932,7 @@ tprintf("Proto Evidence:\n"); for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); + uint16_t ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); for (ProtoNum = 0; ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < ClassTemplate->NumProtos)); ProtoNum++, ActualProtoNum++) { @@ -991,7 +989,6 @@ const ScratchEvidence &tables, bool SeparateDebugWindows) { uint16_t ProtoNum; - uint16_t ActualProtoNum; PROTO_SET_STRUCT *ProtoSet; int ProtoSetIndex; @@ -1003,7 +1000,7 @@ for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; - ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET; + uint16_t ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET; for (ProtoNum = 0; ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < ClassTemplate->NumProtos)); ProtoNum++, ActualProtoNum++) { @@ -1076,13 +1073,12 @@ uint16_t ProtoNum; PROTO_SET_STRUCT *ProtoSet; int NumProtos; - uint16_t ActualProtoNum; NumProtos = ClassTemplate->NumProtos; for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) { ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex]; - ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); + uint16_t ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET); for (ProtoNum = 0; ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos)); ProtoNum++, ActualProtoNum++) { int temp = 0; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/classify/normmatch.cpp new/tesseract-5.4.1/src/classify/normmatch.cpp --- old/tesseract-5.4.0/src/classify/normmatch.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/classify/normmatch.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -105,13 +105,17 @@ return (1 - NormEvidenceOf(Match)); } - float BestMatch = FLT_MAX; - LIST Protos = NormProtos->Protos[ClassId]; - if (DebugMatch) { tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId)); } + LIST Protos = NormProtos->Protos[ClassId]; + if (Protos == nullptr) { + // Avoid FP overflow in NormEvidenceOf. + return 1.0f; + } + + float BestMatch = FLT_MAX; iterate(Protos) { auto Proto = reinterpret_cast<PROTOTYPE *>(Protos->first_node()); float Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY]; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/dict/dict.cpp new/tesseract-5.4.1/src/dict/dict.cpp --- old/tesseract-5.4.0/src/dict/dict.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/dict/dict.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -886,7 +886,7 @@ } WERD_CHOICE new_word(word.unicharset()); auto last_index = word.length() - 1; - int new_len = 0; + int new_len; for (unsigned i = 0; i <= last_index; ++i) { UNICHAR_ID unichar_id = (word.unichar_id(i)); if (getUnicharset().get_ispunctuation(unichar_id)) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/textord/tablefind.cpp new/tesseract-5.4.1/src/textord/tablefind.cpp --- old/tesseract-5.4.0/src/textord/tablefind.cpp 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/textord/tablefind.cpp 2024-06-11 20:18:21.000000000 +0200 @@ -884,8 +884,6 @@ } // Variables used to compute inter-blob spacing. - int current_x0 = -1; - int current_x1 = -1; int previous_x1 = -1; // Stores the maximum gap detected. int largest_partition_gap_found = -1; @@ -897,8 +895,8 @@ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX *blob = it.data(); - current_x0 = blob->bounding_box().left(); - current_x1 = blob->bounding_box().right(); + int current_x0 = blob->bounding_box().left(); + int current_x1 = blob->bounding_box().right(); if (previous_x1 != -1) { int gap = current_x0 - previous_x1; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/src/training/CMakeLists.txt new/tesseract-5.4.1/src/training/CMakeLists.txt --- old/tesseract-5.4.0/src/training/CMakeLists.txt 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/src/training/CMakeLists.txt 2024-06-11 20:18:21.000000000 +0200 @@ -126,6 +126,9 @@ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) generate_export_header(common_training EXPORT_MACRO_NAME TESS_COMMON_TRAINING_API) +if (MSVC) + install(FILES $<TARGET_PDB_FILE:common_training> DESTINATION bin OPTIONAL) +endif() project_group(common_training "Training Tools") # ############################################################################## @@ -141,6 +144,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + if (MSVC) + install(FILES $<TARGET_PDB_FILE:ambiguous_words> DESTINATION bin OPTIONAL) + endif() endif() # ############################################################################## @@ -156,6 +162,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + if (MSVC) + install(FILES $<TARGET_PDB_FILE:classifier_tester> DESTINATION bin OPTIONAL) + endif() endif() # ############################################################################## @@ -170,6 +179,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) +if (MSVC) + install(FILES $<TARGET_PDB_FILE:combine_tessdata> DESTINATION bin OPTIONAL) +endif() # ############################################################################## # EXECUTABLE cntraining @@ -184,6 +196,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + if (MSVC) + install(FILES $<TARGET_PDB_FILE:cntraining> DESTINATION bin OPTIONAL) + endif() endif() # ############################################################################## @@ -198,6 +213,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) +if (MSVC) + install(FILES $<TARGET_PDB_FILE:dawg2wordlist> DESTINATION bin OPTIONAL) +endif() # ############################################################################## # EXECUTABLE mftraining @@ -212,6 +230,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + if (MSVC) + install(FILES $<TARGET_PDB_FILE:mftraining> DESTINATION bin OPTIONAL) + endif() endif() # ############################################################################## @@ -227,6 +248,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + if (MSVC) + install(FILES $<TARGET_PDB_FILE:shapeclustering> DESTINATION bin OPTIONAL) + endif() endif() # ############################################################################## @@ -241,9 +265,11 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) +if (MSVC) + install(FILES $<TARGET_PDB_FILE:wordlist2dawg> DESTINATION bin OPTIONAL) +endif() if(ICU_FOUND) - if(NOT SW_BUILD) include_directories(${ICU_INCLUDE_DIRS}) endif() @@ -272,6 +298,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + if (MSVC) + install(FILES $<TARGET_PDB_FILE:unicharset_training> DESTINATION bin OPTIONAL) + endif() generate_export_header(unicharset_training EXPORT_MACRO_NAME TESS_UNICHARSET_TRAINING_API) project_group(unicharset_training "Training Tools") @@ -288,6 +317,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + if (MSVC) + install(FILES $<TARGET_PDB_FILE:combine_lang_model> DESTINATION bin OPTIONAL) + endif() # ############################################################################ # EXECUTABLE lstmeval @@ -301,6 +333,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + if (MSVC) + install(FILES $<TARGET_PDB_FILE:lstmeval> DESTINATION bin OPTIONAL) + endif() # ############################################################################ # EXECUTABLE lstmtraining @@ -314,6 +349,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + if (MSVC) + install(FILES $<TARGET_PDB_FILE:lstmtraining> DESTINATION bin OPTIONAL) + endif() # ############################################################################ # EXECUTABLE merge_unicharsets @@ -327,6 +365,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + if (MSVC) + install(FILES $<TARGET_PDB_FILE:merge_unicharsets> DESTINATION bin OPTIONAL) + endif() # ############################################################################ # EXECUTABLE set_unicharset_properties @@ -340,6 +381,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + if (MSVC) + install(FILES $<TARGET_PDB_FILE:set_unicharset_properties> DESTINATION bin OPTIONAL) + endif() # ############################################################################ # EXECUTABLE unicharset_extractor @@ -354,6 +398,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) + if (MSVC) + install(FILES $<TARGET_PDB_FILE:unicharset_extractor> DESTINATION bin OPTIONAL) + endif() # ############################################################################ @@ -410,7 +457,9 @@ RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) - + if (MSVC) + install(FILES $<TARGET_PDB_FILE:text2image> DESTINATION bin OPTIONAL) + endif() endif() endif(ICU_FOUND) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/unittest/README.md new/tesseract-5.4.1/unittest/README.md --- old/tesseract-5.4.0/unittest/README.md 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/unittest/README.md 2024-06-11 20:18:21.000000000 +0200 @@ -1,9 +1,9 @@ # Unit Testing for Tesseract - ## Requirements ### Files and structure + ``` âââ langdata_lstm @@ -75,7 +75,6 @@ * [Lohit-Hindi.ttf](https://raw.githubusercontent.com/pratul/packageofpractices/master/assets/fonts/Lohit-Hindi.ttf) * [UnBatang.ttf](https://raw.githubusercontent.com/byrongibson/fonts/master/backup/truetype.original/unfonts-core/UnBatang.ttf) - ## Run tests To run the tests, do the following in tesseract folder diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/tesseract-5.4.0/unittest/fuzzers/oss-fuzz-build.sh new/tesseract-5.4.1/unittest/fuzzers/oss-fuzz-build.sh --- old/tesseract-5.4.0/unittest/fuzzers/oss-fuzz-build.sh 2024-06-06 15:29:45.000000000 +0200 +++ new/tesseract-5.4.1/unittest/fuzzers/oss-fuzz-build.sh 2024-06-11 20:18:21.000000000 +0200 @@ -15,22 +15,22 @@ # ################################################################################ -cd $SRC/leptonica +cd "$SRC"/leptonica ./autogen.sh ./configure --disable-shared -make SUBDIRS=src install -j$(nproc) +make SUBDIRS=src install -j"$(nproc)" ldconfig -cd $SRC/tesseract +cd "$SRC"/tesseract ./autogen.sh CXXFLAGS="$CXXFLAGS -D_GLIBCXX_DEBUG" ./configure --disable-graphics --disable-shared -make -j$(nproc) +make -j"$(nproc)" # Get the models which are needed for the fuzzers. -mkdir -p $OUT/tessdata +mkdir -p "$OUT"/tessdata ( -cd $OUT/tessdata +cd "$OUT"/tessdata test -f eng.traineddata || \ curl -L -O https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata ) @@ -44,9 +44,9 @@ LIBTIFF_LIBS=$(pkg-config --static --libs libtiff-4 | sed 's/ -lm//') $CXX $CXXFLAGS \ - -I $SRC/tesseract/include \ - $SRC/tesseract/unittest/fuzzers/fuzzer-api.cpp -o $OUT/fuzzer-api \ - $SRC/tesseract/.libs/libtesseract.a \ + -I "$SRC"/tesseract/include \ + "$SRC"/tesseract/unittest/fuzzers/fuzzer-api.cpp -o "$OUT"/fuzzer-api \ + "$SRC"/tesseract/.libs/libtesseract.a \ $LEPTONICA_CFLAGS \ -Wl,-Bstatic $LEPTONICA_LIBS $LIBTIFF_LIBS -Wl,-Bdynamic \ $LIB_FUZZING_ENGINE @@ -54,9 +54,9 @@ $CXX $CXXFLAGS \ -DTESSERACT_FUZZER_WIDTH=512 \ -DTESSERACT_FUZZER_HEIGHT=256 \ - -I $SRC/tesseract/include \ - $SRC/tesseract/unittest/fuzzers/fuzzer-api.cpp -o $OUT/fuzzer-api-512x256 \ - $SRC/tesseract/.libs/libtesseract.a \ + -I "$SRC"/tesseract/include \ + "$SRC"/tesseract/unittest/fuzzers/fuzzer-api.cpp -o "$OUT"/fuzzer-api-512x256 \ + "$SRC"/tesseract/.libs/libtesseract.a \ $LEPTONICA_CFLAGS \ -Wl,-Bstatic $LEPTONICA_LIBS $LIBTIFF_LIBS -Wl,-Bdynamic \ $LIB_FUZZING_ENGINE