From f0b57985eee77c2572ae94821d684df7659146c3 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Fri, 7 Aug 2020 00:13:53 +0200 Subject: [PATCH] first stab at embedding ICC profiles --- src/img2pdf.py | 41 +++++++++-- src/img2pdf_test.py | 168 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 199 insertions(+), 10 deletions(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index be61bd8..690e531 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -752,6 +752,7 @@ class pdfdoc(object): bleedborder=None, trimborder=None, artborder=None, + iccp=None, ): if self.engine == Engine.pikepdf: PdfArray = pikepdf.Array @@ -804,6 +805,22 @@ class pdfdoc(object): else: raise UnsupportedColorspaceError("unsupported color space: %s" % color.name) + if iccp is not None: + if self.engine == Engine.pikepdf: + iccpdict = self.writer.make_stream(iccp) + else: + iccpdict = PdfDict(stream=convert_load(iccp)) + iccpdict[PdfName.Alternate] = colorspace + if color == Colorspace["1"] or color == Colorspace.L: + iccpdict[PdfName.N] = 1 + elif color == Colorspace.RGB: + iccpdict[PdfName.N] = 3 + elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]: + iccpdict[PdfName.N] = 4 + elif color == Colorspace.P: + raise Exception("Cannot have Palette images with ICC profile") + colorspace = [PdfName.ICCBased, iccpdict] + # either embed the whole jpeg or deflate the bitmap representation if imgformat is ImageFormat.JPEG: ofilter = PdfName.DCTDecode @@ -930,6 +947,8 @@ class pdfdoc(object): if self.engine == Engine.internal: self.writer.addobj(content) self.writer.addobj(image) + if iccp is not None: + self.writer.addobj(iccpdict) def tostring(self): stream = BytesIO() @@ -1240,9 +1259,13 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): color = Colorspace["CMYK;I"] logging.debug("input colorspace = %s", color.name) + iccp = None + if "icc_profile" in imgdata.info: + iccp = imgdata.info.get("icc_profile") + logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx) - return (color, ndpi, imgwidthpx, imgheightpx, rotation) + return (color, ndpi, imgwidthpx, imgheightpx, rotation, iccp) def ccitt_payload_location_from_pil(img): @@ -1348,7 +1371,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): # JPEG and JPEG2000 can be embedded into the PDF as-is if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000: - color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata( + color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata( imgdata, imgformat, default_dpi, colorspace, rawdata ) if color == Colorspace["1"]: @@ -1371,6 +1394,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): False, 8, rotation, + iccp, ) ] @@ -1382,7 +1406,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): # IHDR chunk. We know where to find that in the file because the IHDR chunk # must be the first chunk. if imgformat == ImageFormat.PNG and rawdata[28] == 0: - color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata( + color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata( imgdata, imgformat, default_dpi, colorspace, rawdata ) pngidat, palette = parse_png(rawdata) @@ -1407,6 +1431,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): False, depth, rotation, + iccp, ) ] @@ -1463,7 +1488,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): "unsupported photometric interpretation for " "group4 tiff: %d" % photo ) - color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata( + color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata( imgdata, imgformat, default_dpi, colorspace, rawdata ) offset, length = ccitt_payload_location_from_pil(imgdata) @@ -1498,6 +1523,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): inverted, 1, rotation, + iccp, ) ) img_page_count += 1 @@ -1505,7 +1531,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): logging.debug("Converting frame: %d" % img_page_count) - color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata( + color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata( imgdata, imgformat, default_dpi, colorspace ) @@ -1526,6 +1552,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): False, 1, rotation, + iccp, ) ) img_page_count += 1 @@ -1563,6 +1590,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): False, 8, rotation, + iccp, ) ) else: @@ -1593,6 +1621,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): False, depth, rotation, + iccp, ) ) img_page_count += 1 @@ -1999,6 +2028,7 @@ def convert(*images, **kwargs): inverted, depth, rotation, + iccp, ) in read_images(rawdata, kwargs["colorspace"], kwargs["first_frame_only"]): pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"]( imgwidthpx, imgheightpx, ndpi @@ -2044,6 +2074,7 @@ def convert(*images, **kwargs): kwargs["bleedborder"], kwargs["trimborder"], kwargs["artborder"], + iccp, ) if kwargs["outputstream"]: diff --git a/src/img2pdf_test.py b/src/img2pdf_test.py index de3d32e..8877d25 100644 --- a/src/img2pdf_test.py +++ b/src/img2pdf_test.py @@ -18,6 +18,7 @@ import decimal from packaging.version import parse as parse_version import warnings import json +import pathlib HAVE_MUTOOL = True try: @@ -207,7 +208,7 @@ def compress(data): return result -def write_png(data, path, bitdepth, colortype, palette=None): +def write_png(data, path, bitdepth, colortype, palette=None, iccp=None): with open(str(path), "wb") as f: f.write(b"\x89PNG\r\n\x1A\n") # PNG image type Colour type Allowed bit depths @@ -231,6 +232,18 @@ def write_png(data, path, bitdepth, colortype, palette=None): + block + struct.pack(">I", zlib.crc32(block)) ) + if iccp is not None: + with open(iccp, "rb") as infh: + iccdata = infh.read() + block = b"iCCP" + block += b"icc\0" # arbitrary profile name + block += b"\0" # compression method (deflate) + block += zlib.compress(iccdata) + f.write( + struct.pack(">I", len(block) - 4) + + block + + struct.pack(">I", zlib.crc32(block)) + ) if palette is not None: block = b"PLTE" for col in palette: @@ -271,7 +284,7 @@ def write_png(data, path, bitdepth, colortype, palette=None): f.write(struct.pack(">I", 0) + block + struct.pack(">I", zlib.crc32(block))) -def compare_ghostscript(tmpdir, img, pdf, gsdevice="png16m", exact=True): +def compare_ghostscript(tmpdir, img, pdf, gsdevice="png16m", exact=True, icc=False): if gsdevice in ["png16m", "pnggray"]: ext = "png" elif gsdevice in ["tiff24nc", "tiff32nc", "tiff48nc"]: @@ -291,9 +304,34 @@ def compare_ghostscript(tmpdir, img, pdf, gsdevice="png16m", exact=True): ] ) if exact: - subprocess.check_call( - ["compare", "-metric", "AE", str(img), str(tmpdir / "gs-1.") + ext, "null:"] - ) + if icc: + subprocess.check_call( + [ + "compare", + "-metric", + "AE", + "(", + "-profile", + "/usr/share/color/icc/ghostscript/srgb.icc", + "-depth", + "8", + str(img), + ")", + str(tmpdir / "gs-1.") + ext, + "null:", + ] + ) + else: + subprocess.check_call( + [ + "compare", + "-metric", + "AE", + str(img), + str(tmpdir / "gs-1.") + ext, + "null:", + ] + ) else: psnr = subprocess.run( [ @@ -620,6 +658,25 @@ def tmp_inverse_png(tmp_path_factory, alpha): tmp_inverse_png.unlink() +@pytest.fixture(scope="session") +def tmp_icc_png(tmp_path_factory, alpha): + normal16 = alpha[:, :, 0:3] + tmp_icc_png = tmp_path_factory.mktemp("icc_png") / "icc.png" + write_png( + 0xFF - normal16 / 0xFFFF * 0xFF, + str(tmp_icc_png), + 8, + 2, + iccp="/usr/share/color/icc/sRGB.icc", + ) + assert ( + hashlib.md5(tmp_icc_png.read_bytes()).hexdigest() + == "d09865464626a87b4e7f398e1f914cca" + ) + yield tmp_icc_png + tmp_icc_png.unlink() + + @pytest.fixture(scope="session") def tmp_normal16_png(tmp_path_factory, alpha): normal16 = alpha[:, :, 0:3] @@ -3657,6 +3714,59 @@ def tiff_ccitt_nometa2_img(tmp_path_factory, tmp_gray1_png): in_img.unlink() +@pytest.fixture(scope="session") +def png_icc_img(tmp_icc_png): + in_img = tmp_icc_png + identify = json.loads(subprocess.check_output(["convert", str(in_img), "json:"])) + assert len(identify) == 1 + # somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was + # put into an array, here we cater for the older version containing just + # the bare dictionary + if "image" in identify: + identify = [identify] + assert "image" in identify[0] + assert identify[0]["image"].get("format") == "PNG", str(identify) + assert ( + identify[0]["image"].get("formatDescription") == "Portable Network Graphics" + ), str(identify) + assert identify[0]["image"].get("mimeType") == "image/png", str(identify) + assert identify[0]["image"].get("geometry") == { + "width": 60, + "height": 60, + "x": 0, + "y": 0, + }, str(identify) + assert identify[0]["image"].get("colorspace") == "sRGB", str(identify) + assert identify[0]["image"].get("type") == "TrueColor", str(identify) + assert identify[0]["image"].get("depth") == 8, str(identify) + assert identify[0]["image"].get("pageGeometry") == { + "width": 60, + "height": 60, + "x": 0, + "y": 0, + }, str(identify) + assert identify[0]["image"].get("compression") == "Zip", str(identify) + assert ( + identify[0]["image"].get("properties", {}).get("png:IHDR.bit-depth-orig") == "8" + ), str(identify) + assert ( + identify[0]["image"].get("properties", {}).get("png:IHDR.bit_depth") == "8" + ), str(identify) + assert ( + identify[0]["image"].get("properties", {}).get("png:IHDR.color-type-orig") + == "2" + ), str(identify) + assert ( + identify[0]["image"].get("properties", {}).get("png:IHDR.color_type") + == "2 (Truecolor)" + ), str(identify) + assert ( + identify[0]["image"]["properties"]["png:IHDR.interlace_method"] + == "0 (Not interlaced)" + ), str(identify) + return in_img + + ############################################################################### # OUTPUT FIXTURES # ############################################################################### @@ -4138,6 +4248,42 @@ def png_palette8_pdf(tmp_path_factory, tmp_palette8_png, request): out_pdf.unlink() +@pytest.fixture(scope="session", params=["internal", "pikepdf", "pdfrw"]) +def png_icc_pdf(tmp_path_factory, tmp_icc_png, request): + out_pdf = tmp_path_factory.mktemp("png_icc_pdf") / "out.pdf" + subprocess.check_call( + [ + "src/img2pdf.py", + "--producer=", + "--nodate", + "--engine=" + request.param, + "--output=" + str(out_pdf), + str(tmp_icc_png), + ] + ) + with pikepdf.open(str(out_pdf)) as p: + assert ( + p.pages[0].Contents.read_bytes() + == b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ" + ) + assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8 + assert p.pages[0].Resources.XObject.Im0.ColorSpace[0] == "/ICCBased" + assert p.pages[0].Resources.XObject.Im0.ColorSpace[1].N == 3 + assert p.pages[0].Resources.XObject.Im0.ColorSpace[1].Alternate == "/DeviceRGB" + assert ( + p.pages[0].Resources.XObject.Im0.ColorSpace[1].read_bytes() + == pathlib.Path("/usr/share/color/icc/sRGB.icc").read_bytes() + ) + assert p.pages[0].Resources.XObject.Im0.DecodeParms.BitsPerComponent == 8 + assert p.pages[0].Resources.XObject.Im0.DecodeParms.Colors == 3 + assert p.pages[0].Resources.XObject.Im0.DecodeParms.Predictor == 15 + assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode" + assert p.pages[0].Resources.XObject.Im0.Height == 60 + assert p.pages[0].Resources.XObject.Im0.Width == 60 + yield out_pdf + out_pdf.unlink() + + @pytest.fixture(scope="session", params=["internal", "pikepdf"]) def gif_palette1_pdf(tmp_path_factory, gif_palette1_img, request): out_pdf = tmp_path_factory.mktemp("gif_palette1_pdf") / "out.pdf" @@ -5166,6 +5312,18 @@ def test_png_palette8(tmp_path_factory, png_palette8_img, png_palette8_pdf): # pdfimages cannot export palette based images +@pytest.mark.skipif( + sys.platform in ["darwin", "win32"], + reason="test utilities not available on Windows and MacOS", +) +def test_png_icc(tmp_path_factory, png_icc_img, png_icc_pdf): + tmpdir = tmp_path_factory.mktemp("png_icc") + compare_ghostscript(tmpdir, png_icc_img, png_icc_pdf, icc=True) + # compare_poppler(tmpdir, png_icc_img, png_icc_pdf) + # compare_mupdf(tmpdir, png_icc_img, png_icc_pdf) + # compare_pdfimages_png(tmpdir, png_icc_img, png_icc_pdf) + + @pytest.mark.skipif( sys.platform in ["darwin", "win32"], reason="test utilities not available on Windows and MacOS",