Hello community, here is the log from the commit of package python-pytesseract for openSUSE:Factory checked in at 2019-07-22 17:20:24 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-pytesseract (Old) and /work/SRC/openSUSE:Factory/.python-pytesseract.new.4126 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-pytesseract" Mon Jul 22 17:20:24 2019 rev:5 rq:717602 version:0.2.7 Changes: -------- --- /work/SRC/openSUSE:Factory/python-pytesseract/python-pytesseract.changes 2019-05-16 22:07:10.954441639 +0200 +++ /work/SRC/openSUSE:Factory/.python-pytesseract.new.4126/python-pytesseract.changes 2019-07-22 17:20:25.721893847 +0200 @@ -1,0 +2,6 @@ +Mon Jul 22 13:31:45 UTC 2019 - Tomáš Chvátal <tchva...@suse.com> + +- Update to 0.2.7: + * no upstream changelog + +------------------------------------------------------------------- Old: ---- LICENSE pytesseract-0.2.6.tar.gz New: ---- pytesseract-0.2.7.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-pytesseract.spec ++++++ --- /var/tmp/diff_new_pack.3vYOXk/_old 2019-07-22 17:20:26.249893703 +0200 +++ /var/tmp/diff_new_pack.3vYOXk/_new 2019-07-22 17:20:26.249893703 +0200 @@ -18,14 +18,13 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-pytesseract -Version: 0.2.6 +Version: 0.2.7 Release: 0 Summary: Python wrapper for Google's Tesseract-OCR License: GPL-3.0-only Group: Development/Languages/Python URL: https://github.com/madmaze/python-tesseract Source: https://files.pythonhosted.org/packages/source/p/pytesseract/pytesseract-%{version}.tar.gz -Source10: https://raw.githubusercontent.com/madmaze/pytesseract/v%{version}/LICENSE BuildRequires: %{python_module setuptools} BuildRequires: fdupes BuildRequires: python-rpm-macros @@ -57,7 +56,6 @@ %prep %setup -q -n pytesseract-%{version} sed -i -e '/^#!\//, 1d' src/pytesseract.py -cp %{SOURCE10} . %build %python_build ++++++ pytesseract-0.2.6.tar.gz -> pytesseract-0.2.7.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pytesseract-0.2.6/PKG-INFO new/pytesseract-0.2.7/PKG-INFO --- old/pytesseract-0.2.6/PKG-INFO 2018-12-16 03:37:50.000000000 +0100 +++ new/pytesseract-0.2.7/PKG-INFO 2019-06-19 04:40:12.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: pytesseract -Version: 0.2.6 +Version: 0.2.7 Summary: Python-tesseract is a python wrapper for Google's Tesseract-OCR Home-page: https://github.com/madmaze/python-tesseract Author: Matthias Lee @@ -55,6 +55,13 @@ # French text image to string print(pytesseract.image_to_string(Image.open('test-european.jpg'), lang='fra')) + # In order to bypass the image conversions of pytesseract, just use relative or absolute image path + # NOTE: In this case you should provide tesseract supported images or tesseract will return error + print(pytesseract.image_to_string('test.png')) + + # Batch processing with a single file containing the list of multiple image file paths + print(pytesseract.image_to_string('images.txt')) + # Get bounding box estimates print(pytesseract.image_to_boxes(Image.open('test.png'))) @@ -64,14 +71,10 @@ # Get information about orientation and script detection print(pytesseract.image_to_osd(Image.open('test.png'))) - # In order to bypass the internal image conversions, just use relative or absolute image path - # NOTE: If you don't use supported images, tesseract will return error - print(pytesseract.image_to_string('test.png')) - - # get a searchable PDF + # Get a searchable PDF pdf = pytesseract.image_to_pdf_or_hocr('test.png', extension='pdf') - # get HOCR output + # Get HOCR output hocr = pytesseract.image_to_pdf_or_hocr('test.png', extension='hocr') Support for OpenCV image/NumPy array objects @@ -89,13 +92,11 @@ .. code-block:: python - tessdata_dir_config = r'--tessdata-dir "<replace_with_your_tessdata_dir_path>"' # Example config: r'--tessdata-dir "C:\Program Files (x86)\Tesseract-OCR\tessdata"' # It's important to add double quotes around the dir path. - + tessdata_dir_config = r'--tessdata-dir "<replace_with_your_tessdata_dir_path>"' pytesseract.image_to_string(image, lang='chi_sim', config=tessdata_dir_config) - **Functions** * **get_tesseract_version** Returns the Tesseract version installed in the system. @@ -128,7 +129,7 @@ Prerequisites: - - Python-tesseract requires python 2.6+ or python 3.x + - Python-tesseract requires python 2.7+ or python 3.x - You will need the Python Imaging Library (PIL) (or the `Pillow <https://pypi.org/project/Pillow/>`_ fork). Under Debian/Ubuntu, this is the package **python-imaging** or **python3-imaging**. - Install `Google Tesseract OCR <https://github.com/tesseract-ocr/tesseract>`_ @@ -176,5 +177,5 @@ Keywords: python-tesseract OCR Python Platform: UNKNOWN Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pytesseract-0.2.6/README.rst new/pytesseract-0.2.7/README.rst --- old/pytesseract-0.2.6/README.rst 2018-10-05 03:46:37.000000000 +0200 +++ new/pytesseract-0.2.7/README.rst 2019-06-19 04:40:00.000000000 +0200 @@ -46,6 +46,13 @@ # French text image to string print(pytesseract.image_to_string(Image.open('test-european.jpg'), lang='fra')) + # In order to bypass the image conversions of pytesseract, just use relative or absolute image path + # NOTE: In this case you should provide tesseract supported images or tesseract will return error + print(pytesseract.image_to_string('test.png')) + + # Batch processing with a single file containing the list of multiple image file paths + print(pytesseract.image_to_string('images.txt')) + # Get bounding box estimates print(pytesseract.image_to_boxes(Image.open('test.png'))) @@ -55,14 +62,10 @@ # Get information about orientation and script detection print(pytesseract.image_to_osd(Image.open('test.png'))) - # In order to bypass the internal image conversions, just use relative or absolute image path - # NOTE: If you don't use supported images, tesseract will return error - print(pytesseract.image_to_string('test.png')) - - # get a searchable PDF + # Get a searchable PDF pdf = pytesseract.image_to_pdf_or_hocr('test.png', extension='pdf') - # get HOCR output + # Get HOCR output hocr = pytesseract.image_to_pdf_or_hocr('test.png', extension='hocr') Support for OpenCV image/NumPy array objects @@ -80,13 +83,11 @@ .. code-block:: python - tessdata_dir_config = r'--tessdata-dir "<replace_with_your_tessdata_dir_path>"' # Example config: r'--tessdata-dir "C:\Program Files (x86)\Tesseract-OCR\tessdata"' # It's important to add double quotes around the dir path. - + tessdata_dir_config = r'--tessdata-dir "<replace_with_your_tessdata_dir_path>"' pytesseract.image_to_string(image, lang='chi_sim', config=tessdata_dir_config) - **Functions** * **get_tesseract_version** Returns the Tesseract version installed in the system. @@ -119,7 +120,7 @@ Prerequisites: -- Python-tesseract requires python 2.6+ or python 3.x +- Python-tesseract requires python 2.7+ or python 3.x - You will need the Python Imaging Library (PIL) (or the `Pillow <https://pypi.org/project/Pillow/>`_ fork). Under Debian/Ubuntu, this is the package **python-imaging** or **python3-imaging**. - Install `Google Tesseract OCR <https://github.com/tesseract-ocr/tesseract>`_ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pytesseract-0.2.6/pytesseract.egg-info/PKG-INFO new/pytesseract-0.2.7/pytesseract.egg-info/PKG-INFO --- old/pytesseract-0.2.6/pytesseract.egg-info/PKG-INFO 2018-12-16 03:37:50.000000000 +0100 +++ new/pytesseract-0.2.7/pytesseract.egg-info/PKG-INFO 2019-06-19 04:40:12.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: pytesseract -Version: 0.2.6 +Version: 0.2.7 Summary: Python-tesseract is a python wrapper for Google's Tesseract-OCR Home-page: https://github.com/madmaze/python-tesseract Author: Matthias Lee @@ -55,6 +55,13 @@ # French text image to string print(pytesseract.image_to_string(Image.open('test-european.jpg'), lang='fra')) + # In order to bypass the image conversions of pytesseract, just use relative or absolute image path + # NOTE: In this case you should provide tesseract supported images or tesseract will return error + print(pytesseract.image_to_string('test.png')) + + # Batch processing with a single file containing the list of multiple image file paths + print(pytesseract.image_to_string('images.txt')) + # Get bounding box estimates print(pytesseract.image_to_boxes(Image.open('test.png'))) @@ -64,14 +71,10 @@ # Get information about orientation and script detection print(pytesseract.image_to_osd(Image.open('test.png'))) - # In order to bypass the internal image conversions, just use relative or absolute image path - # NOTE: If you don't use supported images, tesseract will return error - print(pytesseract.image_to_string('test.png')) - - # get a searchable PDF + # Get a searchable PDF pdf = pytesseract.image_to_pdf_or_hocr('test.png', extension='pdf') - # get HOCR output + # Get HOCR output hocr = pytesseract.image_to_pdf_or_hocr('test.png', extension='hocr') Support for OpenCV image/NumPy array objects @@ -89,13 +92,11 @@ .. code-block:: python - tessdata_dir_config = r'--tessdata-dir "<replace_with_your_tessdata_dir_path>"' # Example config: r'--tessdata-dir "C:\Program Files (x86)\Tesseract-OCR\tessdata"' # It's important to add double quotes around the dir path. - + tessdata_dir_config = r'--tessdata-dir "<replace_with_your_tessdata_dir_path>"' pytesseract.image_to_string(image, lang='chi_sim', config=tessdata_dir_config) - **Functions** * **get_tesseract_version** Returns the Tesseract version installed in the system. @@ -128,7 +129,7 @@ Prerequisites: - - Python-tesseract requires python 2.6+ or python 3.x + - Python-tesseract requires python 2.7+ or python 3.x - You will need the Python Imaging Library (PIL) (or the `Pillow <https://pypi.org/project/Pillow/>`_ fork). Under Debian/Ubuntu, this is the package **python-imaging** or **python3-imaging**. - Install `Google Tesseract OCR <https://github.com/tesseract-ocr/tesseract>`_ @@ -176,5 +177,5 @@ Keywords: python-tesseract OCR Python Platform: UNKNOWN Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pytesseract-0.2.6/setup.py new/pytesseract-0.2.7/setup.py --- old/pytesseract-0.2.6/setup.py 2018-12-16 03:37:30.000000000 +0100 +++ new/pytesseract-0.2.7/setup.py 2019-06-19 04:40:00.000000000 +0200 @@ -14,7 +14,7 @@ setup( name=PACKAGE_NAME, - version='0.2.6', + version='0.2.7', author='Samuel Hoffstaetter', author_email='sam...@hoffstaetter.com', maintainer='Matthias Lee', @@ -35,7 +35,7 @@ }, classifiers=[ 'Programming Language :: Python', - 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', ] ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pytesseract-0.2.6/src/pytesseract.py new/pytesseract-0.2.7/src/pytesseract.py --- old/pytesseract-0.2.6/src/pytesseract.py 2018-12-16 03:37:30.000000000 +0100 +++ new/pytesseract-0.2.7/src/pytesseract.py 2019-06-19 04:40:00.000000000 +0200 @@ -145,13 +145,13 @@ 'stdin': subprocess.PIPE, 'stderr': subprocess.PIPE, 'startupinfo': None, - 'env': None + 'env': os.environ } if hasattr(subprocess, 'STARTUPINFO'): kwargs['startupinfo'] = subprocess.STARTUPINFO() kwargs['startupinfo'].dwFlags |= subprocess.STARTF_USESHOWWINDOW - kwargs['env'] = os.environ + kwargs['startupinfo'].wShowWindow = subprocess.SW_HIDE if include_stdout: kwargs['stdout'] = subprocess.PIPE @@ -177,7 +177,7 @@ cmd_args += shlex.split(config) - if extension not in ('box', 'osd', 'tsv'): + if extension not in {'box', 'osd', 'tsv'}: cmd_args.append(extension) try: @@ -185,15 +185,15 @@ except OSError: raise TesseractNotFoundError() - status_code, error_string = proc.wait(), proc.stderr.read() - proc.stdin.close() - proc.stdout.close() - proc.stderr.close() - - if status_code: - raise TesseractError(status_code, get_errors(error_string)) + try: + _, error_string = proc.communicate() + finally: + proc.stdin.close() + proc.stdout.close() + proc.stderr.close() - return True + if proc.returncode: + raise TesseractError(proc.returncode, get_errors(error_string)) def run_and_get_output(image, @@ -318,7 +318,7 @@ Returns the result of a Tesseract OCR run on the provided image to pdf/hocr ''' - if extension not in ['pdf', 'hocr']: + if extension not in {'pdf', 'hocr'}: raise ValueError('Unsupported extension: {}'.format(extension)) args = [image, extension, lang, config, nice, True]