Source: ocrmypdf Version: 8.0.1+dfsg-1 Severity: serious Tags: sid bullseye User: debian...@lists.debian.org Usertags: needs-update Control: affects -1 src:pikepdf Control: affects -1 src:ghostscript Control: affects -1 src:pytest
[X-Debbugs-CC: debian...@lists.debian.org, pike...@packages.debian.org, ghostscr...@packages.debian.org, pyt...@packages.debian.org] Dear maintainers, With a recent upload of pikepdf and with a recent upload of ghostscript and with a recent upload of pytest (althought that pulls in the others) the autopkgtest of ocrmypdf fails in testing when that autopkgtest is run with the binary packages of those packages from unstable. It passes when run with only packages from testing. In tabular form, e.g.: pass fail pikepdf from testing 1.6.1+dfsg-1 ocrmypdf from testing 8.0.1+dfsg-1 all others from testing from testing I copied some of the output at the bottom of this report. Currently this regression is blocking the migration of pikepdf, ghostscript and pytest to testing [1]. Because failure is triggered by two packages separately, I filed the bug against ocrmypdf, please reassign (and clone) if that wasn't correct. More information about this bug and the reason for filing it can be found on https://wiki.debian.org/ContinuousIntegration/RegressionEmailInformation Paul [1] https://qa.debian.org/excuses.php?package=pikepdf https://ci.debian.net/data/autopkgtest/testing/amd64/o/ocrmypdf/2854254/log.gz =================================== FAILURES =================================== _______________________ test_non_square_resolution[hocr] _______________________ renderer = 'hocr' spoof_tesseract_cache = {'ADTTMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', 'ADT_ARTIFACTS': '/tmp/autopkgtest-lxc._q0vjo65/do...q0vjo65/downtmp/test-suite-artifacts', 'AUTOPKGTEST_TMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', ...} resources = PosixPath('/tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/resources') outpdf = '/tmp/pytest-of-debci/pytest-0/test_non_square_resolution_hoc0/out.pdf' @pytest.mark.parametrize('renderer', RENDERERS) def test_non_square_resolution(renderer, spoof_tesseract_cache, resources, outpdf): # Confirm input image is non-square resolution in_pageinfo = PdfInfo(resources / 'aspect.pdf') assert in_pageinfo[0].xres != in_pageinfo[0].yres check_ocrmypdf( resources / 'aspect.pdf', outpdf, '--pdf-renderer', renderer, > env=spoof_tesseract_cache, ) tests/test_main.py:481: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ input_file = PosixPath('/tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/resources/aspect.pdf') output_file = '/tmp/pytest-of-debci/pytest-0/test_non_square_resolution_hoc0/out.pdf' env = {'ADTTMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', 'ADT_ARTIFACTS': '/tmp/autopkgtest-lxc._q0vjo65/do...q0vjo65/downtmp/test-suite-artifacts', 'AUTOPKGTEST_TMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', ...} args = ('--pdf-renderer', 'hocr') p = <subprocess.Popen object at 0x7f12ee1bcb90>, out = '' @pytest.helpers.register def check_ocrmypdf(input_file, output_file, *args, env=None): "Run ocrmypdf and confirmed that a valid file was created" p, out, err = run_ocrmypdf(input_file, output_file, *args, env=env) # ensure py.test collects the output, use -s to view print(err, file=sys.stderr) > assert p.returncode == 0 E assert 15 == 0 E + where 15 = <subprocess.Popen object at 0x7f12ee1bcb90>.returncode tests/conftest.py:155: AssertionError ----------------------------- Captured stderr call ----------------------------- INFO - 1: [tesseract] Tesseract cache folder /tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/cache/aspect/__-l__eng__000001.ocr.png__000001__hocr__txt - HIT ERROR - Traceback (most recent call last): File "/usr/lib/python3/dist-packages/ruffus/task.py", line 712, in run_pooled_job_without_exceptions register_cleanup, touch_files_only) File "/usr/lib/python3/dist-packages/ruffus/task.py", line 544, in job_wrapper_io_files ret_val = user_defined_work_func(*params) File "/usr/lib/python3/dist-packages/ocrmypdf/_pipeline.py", line 827, in convert_to_pdfa pdf_layers_file.save(layers_file) ValueError: Cannot overwrite input file _____________________ test_non_square_resolution[sandwich] _____________________ renderer = 'sandwich' spoof_tesseract_cache = {'ADTTMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', 'ADT_ARTIFACTS': '/tmp/autopkgtest-lxc._q0vjo65/do...q0vjo65/downtmp/test-suite-artifacts', 'AUTOPKGTEST_TMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', ...} resources = PosixPath('/tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/resources') outpdf = '/tmp/pytest-of-debci/pytest-0/test_non_square_resolution_san0/out.pdf' @pytest.mark.parametrize('renderer', RENDERERS) def test_non_square_resolution(renderer, spoof_tesseract_cache, resources, outpdf): # Confirm input image is non-square resolution in_pageinfo = PdfInfo(resources / 'aspect.pdf') assert in_pageinfo[0].xres != in_pageinfo[0].yres check_ocrmypdf( resources / 'aspect.pdf', outpdf, '--pdf-renderer', renderer, > env=spoof_tesseract_cache, ) tests/test_main.py:481: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ input_file = PosixPath('/tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/resources/aspect.pdf') output_file = '/tmp/pytest-of-debci/pytest-0/test_non_square_resolution_san0/out.pdf' env = {'ADTTMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', 'ADT_ARTIFACTS': '/tmp/autopkgtest-lxc._q0vjo65/do...q0vjo65/downtmp/test-suite-artifacts', 'AUTOPKGTEST_TMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', ...} args = ('--pdf-renderer', 'sandwich') p = <subprocess.Popen object at 0x7f12ee1bcb50>, out = '' @pytest.helpers.register def check_ocrmypdf(input_file, output_file, *args, env=None): "Run ocrmypdf and confirmed that a valid file was created" p, out, err = run_ocrmypdf(input_file, output_file, *args, env=env) # ensure py.test collects the output, use -s to view print(err, file=sys.stderr) > assert p.returncode == 0 E assert 15 == 0 E + where 15 = <subprocess.Popen object at 0x7f12ee1bcb50>.returncode tests/conftest.py:155: AssertionError ----------------------------- Captured stderr call ----------------------------- INFO - 1: [tesseract] Tesseract cache folder /tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/cache/aspect/__-l__eng__000001.ocr.png__000001.text__pdf__txt - HIT ERROR - Traceback (most recent call last): File "/usr/lib/python3/dist-packages/ruffus/task.py", line 712, in run_pooled_job_without_exceptions register_cleanup, touch_files_only) File "/usr/lib/python3/dist-packages/ruffus/task.py", line 544, in job_wrapper_io_files ret_val = user_defined_work_func(*params) File "/usr/lib/python3/dist-packages/ocrmypdf/_pipeline.py", line 827, in convert_to_pdfa pdf_layers_file.save(layers_file) ValueError: Cannot overwrite input file ___________________ test_convert_to_square_resolution[hocr] ____________________ renderer = 'hocr' spoof_tesseract_cache = {'ADTTMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', 'ADT_ARTIFACTS': '/tmp/autopkgtest-lxc._q0vjo65/do...q0vjo65/downtmp/test-suite-artifacts', 'AUTOPKGTEST_TMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', ...} resources = PosixPath('/tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/resources') outpdf = '/tmp/pytest-of-debci/pytest-0/test_convert_to_square_resolut0/out.pdf' @pytest.mark.parametrize('renderer', RENDERERS) def test_convert_to_square_resolution( renderer, spoof_tesseract_cache, resources, outpdf ): # Confirm input image is non-square resolution in_pageinfo = PdfInfo(resources / 'aspect.pdf') assert in_pageinfo[0].xres != in_pageinfo[0].yres # --force-ocr requires means forced conversion to square resolution check_ocrmypdf( resources / 'aspect.pdf', outpdf, '--force-ocr', '--pdf-renderer', renderer, > env=spoof_tesseract_cache, ) tests/test_main.py:506: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ input_file = PosixPath('/tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/resources/aspect.pdf') output_file = '/tmp/pytest-of-debci/pytest-0/test_convert_to_square_resolut0/out.pdf' env = {'ADTTMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', 'ADT_ARTIFACTS': '/tmp/autopkgtest-lxc._q0vjo65/do...q0vjo65/downtmp/test-suite-artifacts', 'AUTOPKGTEST_TMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', ...} args = ('--force-ocr', '--pdf-renderer', 'hocr') p = <subprocess.Popen object at 0x7f12ee17c690>, out = '' @pytest.helpers.register def check_ocrmypdf(input_file, output_file, *args, env=None): "Run ocrmypdf and confirmed that a valid file was created" p, out, err = run_ocrmypdf(input_file, output_file, *args, env=env) # ensure py.test collects the output, use -s to view print(err, file=sys.stderr) > assert p.returncode == 0 E assert 15 == 0 E + where 15 = <subprocess.Popen object at 0x7f12ee17c690>.returncode tests/conftest.py:155: AssertionError ----------------------------- Captured stderr call ----------------------------- INFO - 1: [tesseract] Tesseract cache folder /tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/cache/aspect/__-l__eng__000001.ocr.png__000001__hocr__txt - HIT ERROR - Traceback (most recent call last): File "/usr/lib/python3/dist-packages/ruffus/task.py", line 712, in run_pooled_job_without_exceptions register_cleanup, touch_files_only) File "/usr/lib/python3/dist-packages/ruffus/task.py", line 544, in job_wrapper_io_files ret_val = user_defined_work_func(*params) File "/usr/lib/python3/dist-packages/ocrmypdf/_pipeline.py", line 827, in convert_to_pdfa pdf_layers_file.save(layers_file) ValueError: Cannot overwrite input file _________________ test_convert_to_square_resolution[sandwich] __________________ renderer = 'sandwich' spoof_tesseract_cache = {'ADTTMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', 'ADT_ARTIFACTS': '/tmp/autopkgtest-lxc._q0vjo65/do...q0vjo65/downtmp/test-suite-artifacts', 'AUTOPKGTEST_TMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', ...} resources = PosixPath('/tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/resources') outpdf = '/tmp/pytest-of-debci/pytest-0/test_convert_to_square_resolut1/out.pdf' @pytest.mark.parametrize('renderer', RENDERERS) def test_convert_to_square_resolution( renderer, spoof_tesseract_cache, resources, outpdf ): # Confirm input image is non-square resolution in_pageinfo = PdfInfo(resources / 'aspect.pdf') assert in_pageinfo[0].xres != in_pageinfo[0].yres # --force-ocr requires means forced conversion to square resolution check_ocrmypdf( resources / 'aspect.pdf', outpdf, '--force-ocr', '--pdf-renderer', renderer, > env=spoof_tesseract_cache, ) tests/test_main.py:506: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ input_file = PosixPath('/tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/resources/aspect.pdf') output_file = '/tmp/pytest-of-debci/pytest-0/test_convert_to_square_resolut1/out.pdf' env = {'ADTTMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', 'ADT_ARTIFACTS': '/tmp/autopkgtest-lxc._q0vjo65/do...q0vjo65/downtmp/test-suite-artifacts', 'AUTOPKGTEST_TMP': '/tmp/autopkgtest-lxc._q0vjo65/downtmp/autopkgtest_tmp', ...} args = ('--force-ocr', '--pdf-renderer', 'sandwich') p = <subprocess.Popen object at 0x7f12edfd38d0>, out = '' @pytest.helpers.register def check_ocrmypdf(input_file, output_file, *args, env=None): "Run ocrmypdf and confirmed that a valid file was created" p, out, err = run_ocrmypdf(input_file, output_file, *args, env=env) # ensure py.test collects the output, use -s to view print(err, file=sys.stderr) > assert p.returncode == 0 E assert 15 == 0 E + where 15 = <subprocess.Popen object at 0x7f12edfd38d0>.returncode tests/conftest.py:155: AssertionError ----------------------------- Captured stderr call ----------------------------- INFO - 1: [tesseract] Tesseract cache folder /tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/cache/aspect/__-l__eng__000001.ocr.png__000001.text__pdf__txt - HIT ERROR - Traceback (most recent call last): File "/usr/lib/python3/dist-packages/ruffus/task.py", line 712, in run_pooled_job_without_exceptions register_cleanup, touch_files_only) File "/usr/lib/python3/dist-packages/ruffus/task.py", line 544, in job_wrapper_io_files ret_val = user_defined_work_func(*params) File "/usr/lib/python3/dist-packages/ocrmypdf/_pipeline.py", line 827, in convert_to_pdfa pdf_layers_file.save(layers_file) ValueError: Cannot overwrite input file _________________________ test_metadata_fixup_warning __________________________ resources = PosixPath('/tmp/autopkgtest-lxc._q0vjo65/downtmp/build.Oxe/src/tests/resources') outdir = PosixPath('/tmp/pytest-of-debci/pytest-0/test_metadata_fixup_warning0') def test_metadata_fixup_warning(resources, outdir): from ocrmypdf._pipeline import metadata_fixup input_files = [ str(outdir / 'graph.repaired.pdf'), str(outdir / 'layers.rendered.pdf'), str(outdir / 'pdfa.pdf'), # It is okay that this is not a PDF/A ] for f in input_files: copyfile(resources / 'graph.pdf', f) log = MagicMock() context = MagicMock() metadata_fixup( input_files_groups=input_files, output_file=outdir / 'out.pdf', log=log, context=context, ) log.warning.assert_not_called() # Now add some metadata that will not be copyable graph = pikepdf.open(outdir / 'graph.repaired.pdf') with graph.open_metadata() as meta: meta['prism2:publicationName'] = 'OCRmyPDF Test' > graph.save(outdir / 'graph.repaired.pdf') E ValueError: Cannot overwrite input file tests/test_metadata.py:314: ValueError
signature.asc
Description: OpenPGP digital signature