Source: pillow, img2pdf Control: found -1 pillow/9.0.0-1 Control: found -1 img2pdf/0.4.2-1 Severity: serious Tags: sid bookworm X-Debbugs-CC: debian...@lists.debian.org User: debian...@lists.debian.org Usertags: breaks needs-update
Dear maintainer(s),With a recent upload of pillow the autopkgtest of img2pdf fails in testing when that autopkgtest is run with the binary packages of pillow from unstable. It passes when run with only packages from testing. In tabular form:
pass fail pillow from testing 9.0.0-1 img2pdf from testing 0.4.2-1 all others from testing from testing I copied some of the output at the bottom of this report.Currently this regression is blocking the migration of pillow to testing [1]. Due to the nature of this issue, I filed this bug report against both packages. Can you please investigate the situation and reassign the bug to the right package?
More information about this bug and the reason for filing it can be found on https://wiki.debian.org/ContinuousIntegration/RegressionEmailInformation Paul [1] https://qa.debian.org/excuses.php?package=pillow https://ci.debian.net/data/autopkgtest/testing/amd64/i/img2pdf/18501325/log.gz============================= test session starts ==============================
platform linux -- Python 3.9.9, pytest-6.2.5, py-1.10.0, pluggy-0.13.0 rootdir: /tmp/autopkgtest-lxc.e_87xohl/downtmp/build.JA2/src collected 301 itemssrc/img2pdf_test.py .................................................... [ 17%] EE...................................................................... [ 41%] ........................................................................ [ 65%] ........................................................................ [ 89%] .....................FF.......... [100%]
==================================== ERRORS ==================================== ________________ ERROR at setup of test_gif_animation[internal] ________________
tmp_path_factory = TempPathFactory(_given_basetemp=None, _trace=<pluggy._tracing.TagTracerSub object at 0x7feb26630fa0>, _basetemp=PosixPath('/tmp/pytest-of-debci/pytest-0')) gif_animation_img = PosixPath('/tmp/pytest-of-debci/pytest-0/gif_animation_img0/in.gif') request = <SubRequest 'gif_animation_pdf' for <Function test_gif_animation[internal]>>
@pytest.fixture(scope="session", params=["internal", "pikepdf"]) def gif_animation_pdf(tmp_path_factory, gif_animation_img, request): tmpdir = tmp_path_factory.mktemp("gif_animation_pdf") out_pdf = tmpdir / "out.pdf" subprocess.check_call( [ img2pdfprog, "--producer=", "--nodate", "--engine=" + request.param, "--output=" + str(out_pdf), str(gif_animation_img), ] ) pdfinfo = subprocess.check_output(["pdfinfo", str(out_pdf)]) assert re.search( "^Pages: +2$", pdfinfo.decode("utf8"), re.MULTILINE ), identify.decode("utf8")subprocess.check_call(["pdfseparate", str(out_pdf), str(tmpdir / "page-%d.pdf")])
for page in [1, 2]: gif_animation_pdf_nr = tmpdir / ("page-%d.pdf" % page) with pikepdf.open(gif_animation_pdf_nr) as p: assert ( p.pages[0].Contents.read_bytes()== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
)assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8
assert p.pages[0].Resources.XObject.Im0.ColorSpace[0] == "/Indexed"
E TypeError: object is not an array src/img2pdf_test.py:4697: TypeError________________ ERROR at setup of test_gif_animation[pikepdf] _________________
tmp_path_factory = TempPathFactory(_given_basetemp=None, _trace=<pluggy._tracing.TagTracerSub object at 0x7feb26630fa0>, _basetemp=PosixPath('/tmp/pytest-of-debci/pytest-0')) gif_animation_img = PosixPath('/tmp/pytest-of-debci/pytest-0/gif_animation_img0/in.gif') request = <SubRequest 'gif_animation_pdf' for <Function test_gif_animation[pikepdf]>>
@pytest.fixture(scope="session", params=["internal", "pikepdf"]) def gif_animation_pdf(tmp_path_factory, gif_animation_img, request): tmpdir = tmp_path_factory.mktemp("gif_animation_pdf") out_pdf = tmpdir / "out.pdf" subprocess.check_call( [ img2pdfprog, "--producer=", "--nodate", "--engine=" + request.param, "--output=" + str(out_pdf), str(gif_animation_img), ] ) pdfinfo = subprocess.check_output(["pdfinfo", str(out_pdf)]) assert re.search( "^Pages: +2$", pdfinfo.decode("utf8"), re.MULTILINE ), identify.decode("utf8")subprocess.check_call(["pdfseparate", str(out_pdf), str(tmpdir / "page-%d.pdf")])
for page in [1, 2]: gif_animation_pdf_nr = tmpdir / ("page-%d.pdf" % page) with pikepdf.open(gif_animation_pdf_nr) as p: assert ( p.pages[0].Contents.read_bytes()== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
)assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8
assert p.pages[0].Resources.XObject.Im0.ColorSpace[0] == "/Indexed"
E TypeError: object is not an array src/img2pdf_test.py:4697: TypeError=================================== FAILURES =================================== _____________________ test_general[animation.gif-internal] _____________________
general_input = 'animation.gif', engine = <Engine.internal: 1> @pytest.mark.parametrize("engine", ["internal", "pikepdf"]) def test_general(general_input, engine):inputf = os.path.join(os.path.dirname(__file__), "tests", "input", general_input)
outputf = os.path.join(os.path.dirname(__file__), "tests", "output", general_input + ".pdf"
) assert os.path.isfile(outputf) f = inputf out = outputf engine = getattr(img2pdf.Engine, engine) with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, engine=engine) x = pikepdf.open(BytesIO(output)) assert x.Root.Pages.Count in (1, 2) if len(x.Root.Pages.Kids) == "1": assert x.Size == "7" assert len(x.Root.Pages.Kids) == 1 elif len(x.Root.Pages.Kids) == "2": assert x.Size == "10" assert len(x.Root.Pages.Kids) == 2 assert sorted(x.Root.keys()) == ["/Pages", "/Type"] assert x.Root.Type == "/Catalog" assert sorted(x.Root.Pages.keys()) == ["/Count", "/Kids", "/Type"] assert x.Root.Pages.Type == "/Pages" orig_img = Image.open(f) for pagenum in range(len(x.Root.Pages.Kids)): # retrieve the original image frame that this page was # generated from orig_img.seek(pagenum) cur_page = x.Root.Pages.Kids[pagenum] ndpi = orig_img.info.get("dpi", (96.0, 96.0)) # In python3, the returned dpi value for some tiff images will # not be an integer but a float. To make the behaviour of # img2pdf the same between python2 and python3, we convert that # float into an integer by rounding. # Search online for the 72.009 dpi problem for more info. ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) imgwidthpx, imgheightpx = orig_img.size pagewidth = 72.0 * imgwidthpx / ndpi[0] pageheight = 72.0 * imgheightpx / ndpi[1] def format_float(f): if int(f) == f: return int(f) else: return decimal.Decimal("%.4f" % f) assert sorted(cur_page.keys()) == [ "/Contents", "/MediaBox", "/Parent", "/Resources", "/Type", ] assert cur_page.MediaBox == pikepdf.Array( [0, 0, format_float(pagewidth), format_float(pageheight)] ) assert cur_page.Parent == x.Root.Pages assert cur_page.Type == "/Page" assert cur_page.Resources.keys() == {"/XObject"} assert cur_page.Resources.XObject.keys() == {"/Im0"} if engine != img2pdf.Engine.pikepdf:assert cur_page.Contents.Length == len(cur_page.Contents.read_bytes())
assert ( cur_page.Contents.read_bytes() == b"q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n/Im0 Do\nQ" % ( pagewidth, pageheight, ) ) imgprops = cur_page.Resources.XObject.Im0 # test if the filter is valid: assert imgprops.Filter in [ "/DCTDecode", "/JPXDecode", "/FlateDecode", pikepdf.Array([pikepdf.Name.CCITTFaxDecode]), ] # test if the image has correct size assert imgprops.Width == orig_img.size[0] assert imgprops.Height == orig_img.size[1] # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in ["/DCTDecode", "/JPXDecode"]:assert cur_page.Resources.XObject.Im0.read_raw_bytes() == orig_imgdata elif imgprops.Filter == pikepdf.Array([pikepdf.Name.CCITTFaxDecode]):
tiff_header = tiff_header_for_ccitt(int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4
) imgio = BytesIO() imgio.write(tiff_header) imgio.write(cur_page.Resources.XObject.Im0.read_raw_bytes()) imgio.seek(0) im = Image.open(imgio) assert im.tobytes() == orig_img.tobytes() try: im.close() except AttributeError: pass elif imgprops.Filter == "/FlateDecode": # otherwise, the data is flate encoded and has to be equal # to the pixel data of the input imageimgdata = zlib.decompress(cur_page.Resources.XObject.Im0.read_raw_bytes())
if hasattr(imgprops, "DecodeParms"): if orig_img.format == "PNG": pngidat, palette = img2pdf.parse_png(orig_imgdata) elif ( orig_img.format == "TIFF" and orig_img.info["compression"] == "group4" ):offset, length = img2pdf.ccitt_payload_location_from_pil(orig_img)
pngidat = orig_imgdata[offset : offset + length] else: pngbuffer = BytesIO() orig_img.save(pngbuffer, format="png")pngidat, palette = img2pdf.parse_png(pngbuffer.getvalue())
assert zlib.decompress(pngidat) == imgdata else: colorspace = imgprops.ColorSpace if colorspace == "/DeviceGray": colorspace = "L" elif colorspace == "/DeviceRGB": colorspace = "RGB" elif colorspace == "/DeviceCMYK": colorspace = "CMYK" else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata
) if orig_img.mode == "1":assert im.tobytes() == orig_img.convert("L").tobytes() elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): assert im.tobytes() == orig_img.convert("RGB").tobytes()
# the python-pil version 2.3.0-1ubuntu3 in Ubuntu does # not have the close() method try: im.close() except AttributeError: pass else: raise Exception("unknown filter") def rec(obj): if isinstance(obj, pikepdf.Dictionary):return {k: rec(v) for k, v in obj.items() if k != "/Parent"}
elif isinstance(obj, pikepdf.Array): return [rec(v) for v in obj] elif isinstance(obj, pikepdf.Stream): ret = rec(obj.stream_dict) stream = obj.read_raw_bytes() assert len(stream) == ret["/Length"] del ret["/Length"] if ret.get("/Filter") == "/FlateDecode": stream = obj.read_bytes() del ret["/Filter"] ret["stream"] = stream return retelif isinstance(obj, pikepdf.Name) or isinstance(obj, pikepdf.String):
return str(obj) elif isinstance(obj, decimal.Decimal) or isinstance(obj, str): return obj elif isinstance(obj, int): return decimal.Decimal(obj) raise Exception("unhandled: %s" % (type(obj))) y = pikepdf.open(out) pydictx = rec(x.Root) pydicty = rec(y.Root) if f.endswith(os.path.sep + "animation.gif"):# starting with PIL 8.2.0 the palette is half the size when encoding
# our test GIF image as PNG ## to still compare successfully, we truncate the expected palette
import PIL if PIL.__version__ >= "8.2.0": assert len(pydictx["/Pages"]["/Kids"]) == 2 for p in pydictx["/Pages"]["/Kids"]:
assert p["/Resources"]["/XObject"]["/Im0"]["/ColorSpace"][2] == 127
E AssertionError: assert 'e' == 127 src/img2pdf_test.py:6725: AssertionError_____________________ test_general[animation.gif-pikepdf] ______________________
general_input = 'animation.gif', engine = <Engine.pikepdf: 3> @pytest.mark.parametrize("engine", ["internal", "pikepdf"]) def test_general(general_input, engine):inputf = os.path.join(os.path.dirname(__file__), "tests", "input", general_input)
outputf = os.path.join(os.path.dirname(__file__), "tests", "output", general_input + ".pdf"
) assert os.path.isfile(outputf) f = inputf out = outputf engine = getattr(img2pdf.Engine, engine) with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, engine=engine) x = pikepdf.open(BytesIO(output)) assert x.Root.Pages.Count in (1, 2) if len(x.Root.Pages.Kids) == "1": assert x.Size == "7" assert len(x.Root.Pages.Kids) == 1 elif len(x.Root.Pages.Kids) == "2": assert x.Size == "10" assert len(x.Root.Pages.Kids) == 2 assert sorted(x.Root.keys()) == ["/Pages", "/Type"] assert x.Root.Type == "/Catalog" assert sorted(x.Root.Pages.keys()) == ["/Count", "/Kids", "/Type"] assert x.Root.Pages.Type == "/Pages" orig_img = Image.open(f) for pagenum in range(len(x.Root.Pages.Kids)): # retrieve the original image frame that this page was # generated from orig_img.seek(pagenum) cur_page = x.Root.Pages.Kids[pagenum] ndpi = orig_img.info.get("dpi", (96.0, 96.0)) # In python3, the returned dpi value for some tiff images will # not be an integer but a float. To make the behaviour of # img2pdf the same between python2 and python3, we convert that # float into an integer by rounding. # Search online for the 72.009 dpi problem for more info. ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) imgwidthpx, imgheightpx = orig_img.size pagewidth = 72.0 * imgwidthpx / ndpi[0] pageheight = 72.0 * imgheightpx / ndpi[1] def format_float(f): if int(f) == f: return int(f) else: return decimal.Decimal("%.4f" % f) assert sorted(cur_page.keys()) == [ "/Contents", "/MediaBox", "/Parent", "/Resources", "/Type", ] assert cur_page.MediaBox == pikepdf.Array( [0, 0, format_float(pagewidth), format_float(pageheight)] ) assert cur_page.Parent == x.Root.Pages assert cur_page.Type == "/Page" assert cur_page.Resources.keys() == {"/XObject"} assert cur_page.Resources.XObject.keys() == {"/Im0"} if engine != img2pdf.Engine.pikepdf:assert cur_page.Contents.Length == len(cur_page.Contents.read_bytes())
assert ( cur_page.Contents.read_bytes() == b"q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n/Im0 Do\nQ" % ( pagewidth, pageheight, ) ) imgprops = cur_page.Resources.XObject.Im0 # test if the filter is valid: assert imgprops.Filter in [ "/DCTDecode", "/JPXDecode", "/FlateDecode", pikepdf.Array([pikepdf.Name.CCITTFaxDecode]), ] # test if the image has correct size assert imgprops.Width == orig_img.size[0] assert imgprops.Height == orig_img.size[1] # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in ["/DCTDecode", "/JPXDecode"]:assert cur_page.Resources.XObject.Im0.read_raw_bytes() == orig_imgdata elif imgprops.Filter == pikepdf.Array([pikepdf.Name.CCITTFaxDecode]):
tiff_header = tiff_header_for_ccitt(int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4
) imgio = BytesIO() imgio.write(tiff_header) imgio.write(cur_page.Resources.XObject.Im0.read_raw_bytes()) imgio.seek(0) im = Image.open(imgio) assert im.tobytes() == orig_img.tobytes() try: im.close() except AttributeError: pass elif imgprops.Filter == "/FlateDecode": # otherwise, the data is flate encoded and has to be equal # to the pixel data of the input imageimgdata = zlib.decompress(cur_page.Resources.XObject.Im0.read_raw_bytes())
if hasattr(imgprops, "DecodeParms"): if orig_img.format == "PNG": pngidat, palette = img2pdf.parse_png(orig_imgdata) elif ( orig_img.format == "TIFF" and orig_img.info["compression"] == "group4" ):offset, length = img2pdf.ccitt_payload_location_from_pil(orig_img)
pngidat = orig_imgdata[offset : offset + length] else: pngbuffer = BytesIO() orig_img.save(pngbuffer, format="png")pngidat, palette = img2pdf.parse_png(pngbuffer.getvalue())
assert zlib.decompress(pngidat) == imgdata else: colorspace = imgprops.ColorSpace if colorspace == "/DeviceGray": colorspace = "L" elif colorspace == "/DeviceRGB": colorspace = "RGB" elif colorspace == "/DeviceCMYK": colorspace = "CMYK" else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata
) if orig_img.mode == "1":assert im.tobytes() == orig_img.convert("L").tobytes() elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): assert im.tobytes() == orig_img.convert("RGB").tobytes()
# the python-pil version 2.3.0-1ubuntu3 in Ubuntu does # not have the close() method try: im.close() except AttributeError: pass else: raise Exception("unknown filter") def rec(obj): if isinstance(obj, pikepdf.Dictionary):return {k: rec(v) for k, v in obj.items() if k != "/Parent"}
elif isinstance(obj, pikepdf.Array): return [rec(v) for v in obj] elif isinstance(obj, pikepdf.Stream): ret = rec(obj.stream_dict) stream = obj.read_raw_bytes() assert len(stream) == ret["/Length"] del ret["/Length"] if ret.get("/Filter") == "/FlateDecode": stream = obj.read_bytes() del ret["/Filter"] ret["stream"] = stream return retelif isinstance(obj, pikepdf.Name) or isinstance(obj, pikepdf.String):
return str(obj) elif isinstance(obj, decimal.Decimal) or isinstance(obj, str): return obj elif isinstance(obj, int): return decimal.Decimal(obj) raise Exception("unhandled: %s" % (type(obj))) y = pikepdf.open(out) pydictx = rec(x.Root) pydicty = rec(y.Root) if f.endswith(os.path.sep + "animation.gif"):# starting with PIL 8.2.0 the palette is half the size when encoding
# our test GIF image as PNG ## to still compare successfully, we truncate the expected palette
import PIL if PIL.__version__ >= "8.2.0": assert len(pydictx["/Pages"]["/Kids"]) == 2 for p in pydictx["/Pages"]["/Kids"]:
assert p["/Resources"]["/XObject"]["/Im0"]["/ColorSpace"][2] == 127
E AssertionError: assert 'e' == 127 src/img2pdf_test.py:6725: AssertionError=========================== short test summary info ============================ FAILED src/img2pdf_test.py::test_general[animation.gif-internal] - AssertionE... FAILED src/img2pdf_test.py::test_general[animation.gif-pikepdf] - AssertionEr... ERROR src/img2pdf_test.py::test_gif_animation[internal] - TypeError: object i... ERROR src/img2pdf_test.py::test_gif_animation[pikepdf] - TypeError: object is... =================== 2 failed, 297 passed, 2 errors in 58.18s ===================
autopkgtest [00:14:31]: test pytest
OpenPGP_signature
Description: OpenPGP digital signature