Bug#1004087: pillow breaks img2pdf autopkgtest: TypeError: object is not an array

Paul Gevers Thu, 20 Jan 2022 08:24:21 -0800

Source: pillow, img2pdf
Control: found -1 pillow/9.0.0-1
Control: found -1 img2pdf/0.4.2-1
Severity: serious
Tags: sid bookworm
X-Debbugs-CC: debian...@lists.debian.org
User: debian...@lists.debian.org
Usertags: breaks needs-update


Dear maintainer(s),

With a recent upload of pillow the autopkgtest of img2pdf fails in testing when that autopkgtest is run with the binary packages of pillow from unstable. It passes when run with only packages from testing. In tabular form:


                       pass            fail
pillow                 from testing    9.0.0-1
img2pdf                from testing    0.4.2-1
all others             from testing    from testing

I copied some of the output at the bottom of this report.

Currently this regression is blocking the migration of pillow to testing [1]. Due to the nature of this issue, I filed this bug report against both packages. Can you please investigate the situation and reassign the bug to the right package?


More information about this bug and the reason for filing it can be found on
https://wiki.debian.org/ContinuousIntegration/RegressionEmailInformation

Paul

[1] https://qa.debian.org/excuses.php?package=pillow

https://ci.debian.net/data/autopkgtest/testing/amd64/i/img2pdf/18501325/log.gz

============================= test session starts ==============================

platform linux -- Python 3.9.9, pytest-6.2.5, py-1.10.0, pluggy-0.13.0
rootdir: /tmp/autopkgtest-lxc.e_87xohl/downtmp/build.JA2/src
collected 301 items

src/img2pdf_test.py .................................................... [ 17%] EE...................................................................... [ 41%] ........................................................................ [ 65%] ........................................................................ [ 89%] .....................FF.......... [100%]

==================================== ERRORS ==================================== ________________ ERROR at setup of test_gif_animation[internal] ________________

tmp_path_factory = TempPathFactory(_given_basetemp=None, _trace=<pluggy._tracing.TagTracerSub object at 0x7feb26630fa0>, _basetemp=PosixPath('/tmp/pytest-of-debci/pytest-0')) gif_animation_img = PosixPath('/tmp/pytest-of-debci/pytest-0/gif_animation_img0/in.gif') request = <SubRequest 'gif_animation_pdf' for <Function test_gif_animation[internal]>>


    @pytest.fixture(scope="session", params=["internal", "pikepdf"])
    def gif_animation_pdf(tmp_path_factory, gif_animation_img, request):
        tmpdir = tmp_path_factory.mktemp("gif_animation_pdf")
        out_pdf = tmpdir / "out.pdf"
        subprocess.check_call(
            [
                img2pdfprog,
                "--producer=",
                "--nodate",
                "--engine=" + request.param,
                "--output=" + str(out_pdf),
                str(gif_animation_img),
            ]
        )
        pdfinfo = subprocess.check_output(["pdfinfo", str(out_pdf)])
        assert re.search(
            "^Pages: +2$", pdfinfo.decode("utf8"), re.MULTILINE
        ), identify.decode("utf8")

subprocess.check_call(["pdfseparate", str(out_pdf), str(tmpdir / "page-%d.pdf")])

        for page in [1, 2]:
            gif_animation_pdf_nr = tmpdir / ("page-%d.pdf" % page)
            with pikepdf.open(gif_animation_pdf_nr) as p:
                assert (
                    p.pages[0].Contents.read_bytes()

== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"

assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8

              assert p.pages[0].Resources.XObject.Im0.ColorSpace[0] == 
"/Indexed"

E               TypeError: object is not an array

src/img2pdf_test.py:4697: TypeError

________________ ERROR at setup of test_gif_animation[pikepdf] _________________


    @pytest.fixture(scope="session", params=["internal", "pikepdf"])
    def gif_animation_pdf(tmp_path_factory, gif_animation_img, request):
        tmpdir = tmp_path_factory.mktemp("gif_animation_pdf")
        out_pdf = tmpdir / "out.pdf"
        subprocess.check_call(
            [
                img2pdfprog,
                "--producer=",
                "--nodate",
                "--engine=" + request.param,
                "--output=" + str(out_pdf),
                str(gif_animation_img),
            ]
        )
        pdfinfo = subprocess.check_output(["pdfinfo", str(out_pdf)])
        assert re.search(
            "^Pages: +2$", pdfinfo.decode("utf8"), re.MULTILINE
        ), identify.decode("utf8")

subprocess.check_call(["pdfseparate", str(out_pdf), str(tmpdir / "page-%d.pdf")])

        for page in [1, 2]:
            gif_animation_pdf_nr = tmpdir / ("page-%d.pdf" % page)
            with pikepdf.open(gif_animation_pdf_nr) as p:
                assert (
                    p.pages[0].Contents.read_bytes()

== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"

assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8

              assert p.pages[0].Resources.XObject.Im0.ColorSpace[0] == 
"/Indexed"

E               TypeError: object is not an array

src/img2pdf_test.py:4697: TypeError

=================================== FAILURES =================================== _____________________ test_general[animation.gif-internal] _____________________


general_input = 'animation.gif', engine = <Engine.internal: 1>

    @pytest.mark.parametrize("engine", ["internal", "pikepdf"])
    def test_general(general_input, engine):

inputf = os.path.join(os.path.dirname(__file__), "tests", "input", general_input)

        outputf = os.path.join(

os.path.dirname(__file__), "tests", "output", general_input + ".pdf"

        )
        assert os.path.isfile(outputf)
        f = inputf
        out = outputf
            engine = getattr(img2pdf.Engine, engine)
            with open(f, "rb") as inf:
            orig_imgdata = inf.read()
        output = img2pdf.convert(orig_imgdata, nodate=True, engine=engine)
        x = pikepdf.open(BytesIO(output))
        assert x.Root.Pages.Count in (1, 2)
        if len(x.Root.Pages.Kids) == "1":
            assert x.Size == "7"
            assert len(x.Root.Pages.Kids) == 1
        elif len(x.Root.Pages.Kids) == "2":
            assert x.Size == "10"
            assert len(x.Root.Pages.Kids) == 2
        assert sorted(x.Root.keys()) == ["/Pages", "/Type"]
        assert x.Root.Type == "/Catalog"
        assert sorted(x.Root.Pages.keys()) == ["/Count", "/Kids", "/Type"]
        assert x.Root.Pages.Type == "/Pages"
        orig_img = Image.open(f)
        for pagenum in range(len(x.Root.Pages.Kids)):
            # retrieve the original image frame that this page was
            # generated from
            orig_img.seek(pagenum)
            cur_page = x.Root.Pages.Kids[pagenum]
                ndpi = orig_img.info.get("dpi", (96.0, 96.0))
            # In python3, the returned dpi value for some tiff images will
            # not be an integer but a float. To make the behaviour of
            # img2pdf the same between python2 and python3, we convert that
            # float into an integer by rounding.
            # Search online for the 72.009 dpi problem for more info.
            ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
            imgwidthpx, imgheightpx = orig_img.size
            pagewidth = 72.0 * imgwidthpx / ndpi[0]
            pageheight = 72.0 * imgheightpx / ndpi[1]
                def format_float(f):
                if int(f) == f:
                    return int(f)
                else:
                    return decimal.Decimal("%.4f" % f)
                assert sorted(cur_page.keys()) == [
                "/Contents",
                "/MediaBox",
                "/Parent",
                "/Resources",
                "/Type",
            ]
            assert cur_page.MediaBox == pikepdf.Array(
                [0, 0, format_float(pagewidth), format_float(pageheight)]
            )
            assert cur_page.Parent == x.Root.Pages
            assert cur_page.Type == "/Page"
            assert cur_page.Resources.keys() == {"/XObject"}
            assert cur_page.Resources.XObject.keys() == {"/Im0"}
            if engine != img2pdf.Engine.pikepdf:

assert cur_page.Contents.Length == len(cur_page.Contents.read_bytes())

            assert (
                cur_page.Contents.read_bytes()
                == b"q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n/Im0 Do\nQ"
                % (
                    pagewidth,
                    pageheight,
                )
            )
                imgprops = cur_page.Resources.XObject.Im0
                # test if the filter is valid:
            assert imgprops.Filter in [
                "/DCTDecode",
                "/JPXDecode",
                "/FlateDecode",
                pikepdf.Array([pikepdf.Name.CCITTFaxDecode]),
            ]
                # test if the image has correct size
            assert imgprops.Width == orig_img.size[0]
            assert imgprops.Height == orig_img.size[1]
            # if the input file is a jpeg then it should've been copied
            # verbatim into the PDF
            if imgprops.Filter in ["/DCTDecode", "/JPXDecode"]:

assert cur_page.Resources.XObject.Im0.read_raw_bytes() == orig_imgdata elif imgprops.Filter == pikepdf.Array([pikepdf.Name.CCITTFaxDecode]):

                tiff_header = tiff_header_for_ccitt(

int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4

                )
                imgio = BytesIO()
                imgio.write(tiff_header)

imgio.write(cur_page.Resources.XObject.Im0.read_raw_bytes())
                imgio.seek(0)
                im = Image.open(imgio)
                assert im.tobytes() == orig_img.tobytes()
                try:
                    im.close()
                except AttributeError:
                    pass
            elif imgprops.Filter == "/FlateDecode":
                # otherwise, the data is flate encoded and has to be equal
                # to the pixel data of the input image

imgdata = zlib.decompress(cur_page.Resources.XObject.Im0.read_raw_bytes())

                if hasattr(imgprops, "DecodeParms"):
                    if orig_img.format == "PNG":
                        pngidat, palette = img2pdf.parse_png(orig_imgdata)
                    elif (
                        orig_img.format == "TIFF"
                        and orig_img.info["compression"] == "group4"
                    ):

offset, length = img2pdf.ccitt_payload_location_from_pil(orig_img)

                        pngidat = orig_imgdata[offset : offset + length]
                    else:
                        pngbuffer = BytesIO()
                        orig_img.save(pngbuffer, format="png")

pngidat, palette = img2pdf.parse_png(pngbuffer.getvalue())

                    assert zlib.decompress(pngidat) == imgdata
                else:
                    colorspace = imgprops.ColorSpace
                    if colorspace == "/DeviceGray":
                        colorspace = "L"
                    elif colorspace == "/DeviceRGB":
                        colorspace = "RGB"
                    elif colorspace == "/DeviceCMYK":
                        colorspace = "CMYK"
                    else:
                        raise Exception("invalid colorspace")
                    im = Image.frombytes(

colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata

                    )
                    if orig_img.mode == "1":

assert im.tobytes() == orig_img.convert("L").tobytes() elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): assert im.tobytes() == orig_img.convert("RGB").tobytes()

                    # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does
                    # not have the close() method
                    try:
                        im.close()
                    except AttributeError:
                        pass
            else:
                raise Exception("unknown filter")
            def rec(obj):
            if isinstance(obj, pikepdf.Dictionary):

return {k: rec(v) for k, v in obj.items() if k != "/Parent"}

            elif isinstance(obj, pikepdf.Array):
                return [rec(v) for v in obj]
            elif isinstance(obj, pikepdf.Stream):
                ret = rec(obj.stream_dict)
                stream = obj.read_raw_bytes()
                assert len(stream) == ret["/Length"]
                del ret["/Length"]
                if ret.get("/Filter") == "/FlateDecode":
                    stream = obj.read_bytes()
                    del ret["/Filter"]
                ret["stream"] = stream
                return ret

elif isinstance(obj, pikepdf.Name) or isinstance(obj, pikepdf.String):

                return str(obj)
            elif isinstance(obj, decimal.Decimal) or isinstance(obj, str):
                return obj
            elif isinstance(obj, int):
                return decimal.Decimal(obj)
            raise Exception("unhandled: %s" % (type(obj)))
            y = pikepdf.open(out)
        pydictx = rec(x.Root)
        pydicty = rec(y.Root)
        if f.endswith(os.path.sep + "animation.gif"):

# starting with PIL 8.2.0 the palette is half the size when encoding

            # our test GIF image as PNG
            #

# to still compare successfully, we truncate the expected palette

            import PIL
                if PIL.__version__ >= "8.2.0":
                assert len(pydictx["/Pages"]["/Kids"]) == 2
                for p in pydictx["/Pages"]["/Kids"]:

                  assert p["/Resources"]["/XObject"]["/Im0"]["/ColorSpace"][2] 
== 127

E                   AssertionError: assert 'e' == 127

src/img2pdf_test.py:6725: AssertionError

_____________________ test_general[animation.gif-pikepdf] ______________________


general_input = 'animation.gif', engine = <Engine.pikepdf: 3>

    @pytest.mark.parametrize("engine", ["internal", "pikepdf"])
    def test_general(general_input, engine):

inputf = os.path.join(os.path.dirname(__file__), "tests", "input", general_input)

        outputf = os.path.join(

os.path.dirname(__file__), "tests", "output", general_input + ".pdf"

        )
        assert os.path.isfile(outputf)
        f = inputf
        out = outputf
            engine = getattr(img2pdf.Engine, engine)
            with open(f, "rb") as inf:
            orig_imgdata = inf.read()
        output = img2pdf.convert(orig_imgdata, nodate=True, engine=engine)
        x = pikepdf.open(BytesIO(output))
        assert x.Root.Pages.Count in (1, 2)
        if len(x.Root.Pages.Kids) == "1":
            assert x.Size == "7"
            assert len(x.Root.Pages.Kids) == 1
        elif len(x.Root.Pages.Kids) == "2":
            assert x.Size == "10"
            assert len(x.Root.Pages.Kids) == 2
        assert sorted(x.Root.keys()) == ["/Pages", "/Type"]
        assert x.Root.Type == "/Catalog"
        assert sorted(x.Root.Pages.keys()) == ["/Count", "/Kids", "/Type"]
        assert x.Root.Pages.Type == "/Pages"
        orig_img = Image.open(f)
        for pagenum in range(len(x.Root.Pages.Kids)):
            # retrieve the original image frame that this page was
            # generated from
            orig_img.seek(pagenum)
            cur_page = x.Root.Pages.Kids[pagenum]
                ndpi = orig_img.info.get("dpi", (96.0, 96.0))
            # In python3, the returned dpi value for some tiff images will
            # not be an integer but a float. To make the behaviour of
            # img2pdf the same between python2 and python3, we convert that
            # float into an integer by rounding.
            # Search online for the 72.009 dpi problem for more info.
            ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
            imgwidthpx, imgheightpx = orig_img.size
            pagewidth = 72.0 * imgwidthpx / ndpi[0]
            pageheight = 72.0 * imgheightpx / ndpi[1]
                def format_float(f):
                if int(f) == f:
                    return int(f)
                else:
                    return decimal.Decimal("%.4f" % f)
                assert sorted(cur_page.keys()) == [
                "/Contents",
                "/MediaBox",
                "/Parent",
                "/Resources",
                "/Type",
            ]
            assert cur_page.MediaBox == pikepdf.Array(
                [0, 0, format_float(pagewidth), format_float(pageheight)]
            )
            assert cur_page.Parent == x.Root.Pages
            assert cur_page.Type == "/Page"
            assert cur_page.Resources.keys() == {"/XObject"}
            assert cur_page.Resources.XObject.keys() == {"/Im0"}
            if engine != img2pdf.Engine.pikepdf:

assert cur_page.Contents.Length == len(cur_page.Contents.read_bytes())

            assert (
                cur_page.Contents.read_bytes()
                == b"q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n/Im0 Do\nQ"
                % (
                    pagewidth,
                    pageheight,
                )
            )
                imgprops = cur_page.Resources.XObject.Im0
                # test if the filter is valid:
            assert imgprops.Filter in [
                "/DCTDecode",
                "/JPXDecode",
                "/FlateDecode",
                pikepdf.Array([pikepdf.Name.CCITTFaxDecode]),
            ]
                # test if the image has correct size
            assert imgprops.Width == orig_img.size[0]
            assert imgprops.Height == orig_img.size[1]
            # if the input file is a jpeg then it should've been copied
            # verbatim into the PDF
            if imgprops.Filter in ["/DCTDecode", "/JPXDecode"]:

assert cur_page.Resources.XObject.Im0.read_raw_bytes() == orig_imgdata elif imgprops.Filter == pikepdf.Array([pikepdf.Name.CCITTFaxDecode]):

                tiff_header = tiff_header_for_ccitt(

int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4

                )
                imgio = BytesIO()
                imgio.write(tiff_header)

imgio.write(cur_page.Resources.XObject.Im0.read_raw_bytes())
                imgio.seek(0)
                im = Image.open(imgio)
                assert im.tobytes() == orig_img.tobytes()
                try:
                    im.close()
                except AttributeError:
                    pass
            elif imgprops.Filter == "/FlateDecode":
                # otherwise, the data is flate encoded and has to be equal
                # to the pixel data of the input image

imgdata = zlib.decompress(cur_page.Resources.XObject.Im0.read_raw_bytes())

                if hasattr(imgprops, "DecodeParms"):
                    if orig_img.format == "PNG":
                        pngidat, palette = img2pdf.parse_png(orig_imgdata)
                    elif (
                        orig_img.format == "TIFF"
                        and orig_img.info["compression"] == "group4"
                    ):

offset, length = img2pdf.ccitt_payload_location_from_pil(orig_img)

                        pngidat = orig_imgdata[offset : offset + length]
                    else:
                        pngbuffer = BytesIO()
                        orig_img.save(pngbuffer, format="png")

pngidat, palette = img2pdf.parse_png(pngbuffer.getvalue())

                    assert zlib.decompress(pngidat) == imgdata
                else:
                    colorspace = imgprops.ColorSpace
                    if colorspace == "/DeviceGray":
                        colorspace = "L"
                    elif colorspace == "/DeviceRGB":
                        colorspace = "RGB"
                    elif colorspace == "/DeviceCMYK":
                        colorspace = "CMYK"
                    else:
                        raise Exception("invalid colorspace")
                    im = Image.frombytes(

colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata

                    )
                    if orig_img.mode == "1":

assert im.tobytes() == orig_img.convert("L").tobytes() elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): assert im.tobytes() == orig_img.convert("RGB").tobytes()

                    # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does
                    # not have the close() method
                    try:
                        im.close()
                    except AttributeError:
                        pass
            else:
                raise Exception("unknown filter")
            def rec(obj):
            if isinstance(obj, pikepdf.Dictionary):

return {k: rec(v) for k, v in obj.items() if k != "/Parent"}

            elif isinstance(obj, pikepdf.Array):
                return [rec(v) for v in obj]
            elif isinstance(obj, pikepdf.Stream):
                ret = rec(obj.stream_dict)
                stream = obj.read_raw_bytes()
                assert len(stream) == ret["/Length"]
                del ret["/Length"]
                if ret.get("/Filter") == "/FlateDecode":
                    stream = obj.read_bytes()
                    del ret["/Filter"]
                ret["stream"] = stream
                return ret

elif isinstance(obj, pikepdf.Name) or isinstance(obj, pikepdf.String):

                return str(obj)
            elif isinstance(obj, decimal.Decimal) or isinstance(obj, str):
                return obj
            elif isinstance(obj, int):
                return decimal.Decimal(obj)
            raise Exception("unhandled: %s" % (type(obj)))
            y = pikepdf.open(out)
        pydictx = rec(x.Root)
        pydicty = rec(y.Root)
        if f.endswith(os.path.sep + "animation.gif"):

# starting with PIL 8.2.0 the palette is half the size when encoding

            # our test GIF image as PNG
            #

# to still compare successfully, we truncate the expected palette

            import PIL
                if PIL.__version__ >= "8.2.0":
                assert len(pydictx["/Pages"]["/Kids"]) == 2
                for p in pydictx["/Pages"]["/Kids"]:

                  assert p["/Resources"]["/XObject"]["/Im0"]["/ColorSpace"][2] 
== 127

E                   AssertionError: assert 'e' == 127

src/img2pdf_test.py:6725: AssertionError

=========================== short test summary info ============================ FAILED src/img2pdf_test.py::test_general[animation.gif-internal] - AssertionE... FAILED src/img2pdf_test.py::test_general[animation.gif-pikepdf] - AssertionEr... ERROR src/img2pdf_test.py::test_gif_animation[internal] - TypeError: object i... ERROR src/img2pdf_test.py::test_gif_animation[pikepdf] - TypeError: object is... =================== 2 failed, 297 passed, 2 errors in 58.18s ===================

autopkgtest [00:14:31]: test pytest

OpenPGP_signature
Description: OpenPGP digital signature

Bug#1004087: pillow breaks img2pdf autopkgtest: TypeError: object is not an array

Reply via email to