From acc25a49265effbbffa36e053ae2a3aa633eddbf Mon Sep 17 00:00:00 2001 From: Johannes Schauer Marin Rodrigues Date: Sat, 5 Aug 2023 14:59:05 +0200 Subject: [PATCH] Support JPEG2000 images with transparency Closes: #173 --- src/img2pdf.py | 28 +++++--- src/img2pdf_test.py | 157 ++++++++++++++++++++++++++++++++++++++++++++ src/jp2.py | 13 ++-- 3 files changed, 183 insertions(+), 15 deletions(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index ae4a189..ebff595 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -827,8 +827,10 @@ class pdfdoc(object): artborder=None, iccp=None, ): - assert (color != Colorspace.RGBA and color != Colorspace.LA) or ( - imgformat == ImageFormat.PNG and smaskdata is not None + assert ( + color not in [Colorspace.RGBA, Colorspace.LA] + or (imgformat == ImageFormat.PNG and smaskdata is not None) + or imgformat == ImageFormat.JPEG2000 ) if self.engine == Engine.pikepdf: @@ -852,7 +854,13 @@ class pdfdoc(object): if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA: colorspace = PdfName.DeviceGray elif color == Colorspace.RGB or color == Colorspace.RGBA: - colorspace = PdfName.DeviceRGB + if color == Colorspace.RGBA and imgformat == ImageFormat.JPEG2000: + # there is no DeviceRGBA and for JPXDecode it is okay to have + # no colorspace as the pdf reader is supposed to get this info + # from the jpeg2000 payload itself + colorspace = None + else: + colorspace = PdfName.DeviceRGB elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]: colorspace = PdfName.DeviceCMYK elif color == Colorspace.P: @@ -923,7 +931,8 @@ class pdfdoc(object): image[PdfName.Filter] = ofilter image[PdfName.Width] = imgwidthpx image[PdfName.Height] = imgheightpx - image[PdfName.ColorSpace] = colorspace + if colorspace is not None: + image[PdfName.ColorSpace] = colorspace image[PdfName.BitsPerComponent] = depth smask = None @@ -1292,7 +1301,7 @@ def get_imgmetadata( if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None: # this codepath gets called if the PIL installation is not able to # handle JPEG2000 files - imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata) + imgwidthpx, imgheightpx, ics, hdpi, vdpi, channels, bpp = parsejp2(rawdata) if hdpi is None: hdpi = default_dpi @@ -1312,7 +1321,7 @@ def get_imgmetadata( ics = imgdata.mode # GIF and PNG files with transparency are supported - if (imgformat == ImageFormat.PNG or imgformat == ImageFormat.GIF) and ( + if imgformat in [ImageFormat.PNG, ImageFormat.GIF, ImageFormat.JPEG2000] and ( ics in ["RGBA", "LA"] or "transparency" in imgdata.info ): # Must check the IHDR chunk for the bit depth, because PIL would lossily @@ -1828,10 +1837,13 @@ def read_images( raise JpegColorspaceError("jpeg can't be monochrome") if color == Colorspace["P"]: raise JpegColorspaceError("jpeg can't have a color palette") - if color == Colorspace["RGBA"]: + if color == Colorspace["RGBA"] and imgformat != ImageFormat.JPEG2000: raise JpegColorspaceError("jpeg can't have an alpha channel") logger.debug("read_images() embeds a JPEG") cleanup() + depth = 8 + if imgformat == ImageFormat.JPEG2000: + _, _, _, _, _, _, depth = parsejp2(rawdata) return [ ( color, @@ -1843,7 +1855,7 @@ def read_images( imgheightpx, [], False, - 8, + depth, rotation, iccp, ) diff --git a/src/img2pdf_test.py b/src/img2pdf_test.py index 64caed4..e1d2b66 100755 --- a/src/img2pdf_test.py +++ b/src/img2pdf_test.py @@ -361,6 +361,8 @@ def compare(im1, im2, exact, icc, cmyk): + [ "-metric", "AE", + "-alpha", + "off", im1, im2, "null:", @@ -1216,6 +1218,74 @@ def jpg_2000_img(tmp_path_factory, tmp_normal_png): in_img.unlink() +@pytest.fixture(scope="session") +def jpg_2000_rgba8_img(tmp_path_factory, tmp_alpha_png): + in_img = tmp_path_factory.mktemp("jpg_2000_rgba8") / "in.jp2" + subprocess.check_call(CONVERT + [str(tmp_alpha_png), "-depth", "8", str(in_img)]) + identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"])) + assert len(identify) == 1 + # somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was + # put into an array, here we cater for the older version containing just + # the bare dictionary + if "image" in identify: + identify = [identify] + assert "image" in identify[0] + assert identify[0]["image"].get("format") == "JP2", str(identify) + assert identify[0]["image"].get("mimeType") == "image/jp2", str(identify) + assert identify[0]["image"].get("geometry") == { + "width": 60, + "height": 60, + "x": 0, + "y": 0, + }, str(identify) + assert identify[0]["image"].get("colorspace") == "sRGB", str(identify) + assert identify[0]["image"].get("type") == "TrueColorAlpha", str(identify) + assert identify[0]["image"].get("depth") == 8, str(identify) + assert identify[0]["image"].get("pageGeometry") == { + "width": 60, + "height": 60, + "x": 0, + "y": 0, + }, str(identify) + assert identify[0]["image"].get("compression") == "JPEG2000", str(identify) + yield in_img + in_img.unlink() + + +@pytest.fixture(scope="session") +def jpg_2000_rgba16_img(tmp_path_factory, tmp_alpha_png): + in_img = tmp_path_factory.mktemp("jpg_2000_rgba16") / "in.jp2" + subprocess.check_call(CONVERT + [str(tmp_alpha_png), str(in_img)]) + identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"])) + assert len(identify) == 1 + # somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was + # put into an array, here we cater for the older version containing just + # the bare dictionary + if "image" in identify: + identify = [identify] + assert "image" in identify[0] + assert identify[0]["image"].get("format") == "JP2", str(identify) + assert identify[0]["image"].get("mimeType") == "image/jp2", str(identify) + assert identify[0]["image"].get("geometry") == { + "width": 60, + "height": 60, + "x": 0, + "y": 0, + }, str(identify) + assert identify[0]["image"].get("colorspace") == "sRGB", str(identify) + assert identify[0]["image"].get("type") == "TrueColorAlpha", str(identify) + assert identify[0]["image"].get("depth") == 16, str(identify) + assert identify[0]["image"].get("pageGeometry") == { + "width": 60, + "height": 60, + "x": 0, + "y": 0, + }, str(identify) + assert identify[0]["image"].get("compression") == "JPEG2000", str(identify) + yield in_img + in_img.unlink() + + @pytest.fixture(scope="session") def png_rgb8_img(tmp_normal_png): in_img = tmp_normal_png @@ -4068,6 +4138,60 @@ def jpg_2000_pdf(tmp_path_factory, jpg_2000_img, request): out_pdf.unlink() +@pytest.fixture(scope="session", params=["internal", "pikepdf"]) +def jpg_2000_rgba8_pdf(tmp_path_factory, jpg_2000_rgba8_img, request): + out_pdf = tmp_path_factory.mktemp("jpg_2000_rgba8_pdf") / "out.pdf" + subprocess.check_call( + [ + img2pdfprog, + "--producer=", + "--nodate", + "--engine=" + request.param, + "--output=" + str(out_pdf), + jpg_2000_rgba8_img, + ] + ) + with pikepdf.open(str(out_pdf)) as p: + assert ( + p.pages[0].Contents.read_bytes() + == b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ" + ) + assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8 + assert not hasattr(p.pages[0].Resources.XObject.Im0, "ColorSpace") + assert p.pages[0].Resources.XObject.Im0.Filter == "/JPXDecode" + assert p.pages[0].Resources.XObject.Im0.Height == 60 + assert p.pages[0].Resources.XObject.Im0.Width == 60 + yield out_pdf + out_pdf.unlink() + + +@pytest.fixture(scope="session", params=["internal", "pikepdf"]) +def jpg_2000_rgba16_pdf(tmp_path_factory, jpg_2000_rgba16_img, request): + out_pdf = tmp_path_factory.mktemp("jpg_2000_rgba16_pdf") / "out.pdf" + subprocess.check_call( + [ + img2pdfprog, + "--producer=", + "--nodate", + "--engine=" + request.param, + "--output=" + str(out_pdf), + jpg_2000_rgba16_img, + ] + ) + with pikepdf.open(str(out_pdf)) as p: + assert ( + p.pages[0].Contents.read_bytes() + == b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ" + ) + assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 16 + assert not hasattr(p.pages[0].Resources.XObject.Im0, "ColorSpace") + assert p.pages[0].Resources.XObject.Im0.Filter == "/JPXDecode" + assert p.pages[0].Resources.XObject.Im0.Height == 60 + assert p.pages[0].Resources.XObject.Im0.Width == 60 + yield out_pdf + out_pdf.unlink() + + @pytest.fixture(scope="session", params=["internal", "pikepdf"]) def png_rgb8_pdf(tmp_path_factory, png_rgb8_img, request): out_pdf = tmp_path_factory.mktemp("png_rgb8_pdf") / "out.pdf" @@ -5461,6 +5585,39 @@ def test_jpg_2000(tmp_path_factory, jpg_2000_img, jpg_2000_pdf): compare_pdfimages_jp2(tmpdir, jpg_2000_img, jpg_2000_pdf) +@pytest.mark.skipif( + sys.platform in ["win32"], + reason="test utilities not available on Windows and MacOS", +) +@pytest.mark.skipif( + not HAVE_JP2, reason="requires imagemagick with support for jpeg2000" +) +def test_jpg_2000_rgba8(tmp_path_factory, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf): + tmpdir = tmp_path_factory.mktemp("jpg_2000_rgba8") + compare_ghostscript(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf) + compare_poppler(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf) + # compare_mupdf(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf) + compare_pdfimages_jp2(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf) + + +@pytest.mark.skipif( + sys.platform in ["win32"], + reason="test utilities not available on Windows and MacOS", +) +@pytest.mark.skipif( + not HAVE_JP2, reason="requires imagemagick with support for jpeg2000" +) +def test_jpg_2000_rgba16(tmp_path_factory, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf): + tmpdir = tmp_path_factory.mktemp("jpg_2000_rgba16") + compare_ghostscript( + tmpdir, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf, gsdevice="tiff48nc" + ) + # poppler outputs 8-bit RGB so the comparison will not be exact + # compare_poppler(tmpdir, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf, exact=False) + # compare_mupdf(tmpdir, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf) + compare_pdfimages_jp2(tmpdir, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf) + + @pytest.mark.skipif( sys.platform in ["win32"], reason="test utilities not available on Windows and MacOS", diff --git a/src/jp2.py b/src/jp2.py index ae54746..d305d38 100644 --- a/src/jp2.py +++ b/src/jp2.py @@ -37,9 +37,8 @@ def getBox(data, byteStart, noBytes): def parse_ihdr(data): - height = struct.unpack(">I", data[0:4])[0] - width = struct.unpack(">I", data[4:8])[0] - return width, height + height, width, channels, bpp = struct.unpack(">IIHB", data[:11]) + return width, height, channels, bpp+1 def parse_colr(data): @@ -85,13 +84,13 @@ def parse_jp2h(data): while byteStart < noBytes and boxLengthValue != 0: boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes) if boxType == b"ihdr": - width, height = parse_ihdr(boxContents) + width, height, channels, bpp = parse_ihdr(boxContents) elif boxType == b"colr": colorspace = parse_colr(boxContents) elif boxType == b"res ": hdpi, vdpi = parse_res(boxContents) byteStart = byteEnd - return (width, height, colorspace, hdpi, vdpi) + return (width, height, colorspace, hdpi, vdpi, channels, bpp) def parsejp2(data): @@ -102,7 +101,7 @@ def parsejp2(data): while byteStart < noBytes and boxLengthValue != 0: boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes) if boxType == b"jp2h": - width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents) + width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h(boxContents) break byteStart = byteEnd if not width: @@ -112,7 +111,7 @@ def parsejp2(data): if not colorspace: raise Exception("no colorspace in jp2 header") # retrieving the dpi is optional so we do not error out if not present - return (width, height, colorspace, hdpi, vdpi) + return (width, height, colorspace, hdpi, vdpi, channels, bpp) if __name__ == "__main__":