first stab at embedding ICC profiles

2020-08-07 00:13:53 +02:00 · 2020-08-07 00:13:53 +02:00 · f0b57985ee
commit f0b57985ee
parent 1ba02bf838
2 changed files with 199 additions and 10 deletions
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@ -752,6 +752,7 @@ class pdfdoc(object):
        bleedborder=None,
        trimborder=None,
        artborder=None,
+        iccp=None,
    ):
        if self.engine == Engine.pikepdf:
            PdfArray = pikepdf.Array
@ -804,6 +805,22 @@ class pdfdoc(object):
        else:
            raise UnsupportedColorspaceError("unsupported color space: %s" % color.name)

+        if iccp is not None:
+            if self.engine == Engine.pikepdf:
+                iccpdict = self.writer.make_stream(iccp)
+            else:
+                iccpdict = PdfDict(stream=convert_load(iccp))
+            iccpdict[PdfName.Alternate] = colorspace
+            if color == Colorspace["1"] or color == Colorspace.L:
+                iccpdict[PdfName.N] = 1
+            elif color == Colorspace.RGB:
+                iccpdict[PdfName.N] = 3
+            elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
+                iccpdict[PdfName.N] = 4
+            elif color == Colorspace.P:
+                raise Exception("Cannot have Palette images with ICC profile")
+            colorspace = [PdfName.ICCBased, iccpdict]
+
        # either embed the whole jpeg or deflate the bitmap representation
        if imgformat is ImageFormat.JPEG:
            ofilter = PdfName.DCTDecode
@ -930,6 +947,8 @@ class pdfdoc(object):
            if self.engine == Engine.internal:
                self.writer.addobj(content)
                self.writer.addobj(image)
+                if iccp is not None:
+                    self.writer.addobj(iccpdict)

    def tostring(self):
        stream = BytesIO()
@ -1240,9 +1259,13 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None):
                color = Colorspace["CMYK;I"]
        logging.debug("input colorspace = %s", color.name)

+    iccp = None
+    if "icc_profile" in imgdata.info:
+        iccp = imgdata.info.get("icc_profile")
+
    logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)

-    return (color, ndpi, imgwidthpx, imgheightpx, rotation)
+    return (color, ndpi, imgwidthpx, imgheightpx, rotation, iccp)


 def ccitt_payload_location_from_pil(img):
@ -1348,7 +1371,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):

    # JPEG and JPEG2000 can be embedded into the PDF as-is
    if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000:
-        color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
+        color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
            imgdata, imgformat, default_dpi, colorspace, rawdata
        )
        if color == Colorspace["1"]:
@ -1371,6 +1394,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
                False,
                8,
                rotation,
+                iccp,
            )
        ]

@ -1382,7 +1406,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
    # IHDR chunk. We know where to find that in the file because the IHDR chunk
    # must be the first chunk.
    if imgformat == ImageFormat.PNG and rawdata[28] == 0:
-        color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
+        color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
            imgdata, imgformat, default_dpi, colorspace, rawdata
        )
        pngidat, palette = parse_png(rawdata)
@ -1407,6 +1431,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
                False,
                depth,
                rotation,
+                iccp,
            )
        ]

@ -1463,7 +1488,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
                    "unsupported photometric interpretation for "
                    "group4 tiff: %d" % photo
                )
-            color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
+            color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
                imgdata, imgformat, default_dpi, colorspace, rawdata
            )
            offset, length = ccitt_payload_location_from_pil(imgdata)
@ -1498,6 +1523,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
                    inverted,
                    1,
                    rotation,
+                    iccp,
                )
            )
            img_page_count += 1
@ -1505,7 +1531,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):

        logging.debug("Converting frame: %d" % img_page_count)

-        color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
+        color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
            imgdata, imgformat, default_dpi, colorspace
        )

@ -1526,6 +1552,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
                        False,
                        1,
                        rotation,
+                        iccp,
                    )
                )
                img_page_count += 1
@ -1563,6 +1590,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
                    False,
                    8,
                    rotation,
+                    iccp,
                )
            )
        else:
@ -1593,6 +1621,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
                    False,
                    depth,
                    rotation,
+                    iccp,
                )
            )
        img_page_count += 1
@ -1999,6 +2028,7 @@ def convert(*images, **kwargs):
            inverted,
            depth,
            rotation,
+            iccp,
        ) in read_images(rawdata, kwargs["colorspace"], kwargs["first_frame_only"]):
            pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"](
                imgwidthpx, imgheightpx, ndpi
@ -2044,6 +2074,7 @@ def convert(*images, **kwargs):
                kwargs["bleedborder"],
                kwargs["trimborder"],
                kwargs["artborder"],
+                iccp,
            )

    if kwargs["outputstream"]:
--- a/src/img2pdf_test.py
+++ b/src/img2pdf_test.py
@ -18,6 +18,7 @@ import decimal
 from packaging.version import parse as parse_version
 import warnings
 import json
+import pathlib

 HAVE_MUTOOL = True
 try:
@ -207,7 +208,7 @@ def compress(data):
    return result


-def write_png(data, path, bitdepth, colortype, palette=None):
+def write_png(data, path, bitdepth, colortype, palette=None, iccp=None):
    with open(str(path), "wb") as f:
        f.write(b"\x89PNG\r\n\x1A\n")
        # PNG image type        Colour type Allowed bit depths
@ -231,6 +232,18 @@ def write_png(data, path, bitdepth, colortype, palette=None):
            + block
            + struct.pack(">I", zlib.crc32(block))
        )
+        if iccp is not None:
+            with open(iccp, "rb") as infh:
+                iccdata = infh.read()
+            block = b"iCCP"
+            block += b"icc\0"  # arbitrary profile name
+            block += b"\0"  # compression method (deflate)
+            block += zlib.compress(iccdata)
+            f.write(
+                struct.pack(">I", len(block) - 4)
+                + block
+                + struct.pack(">I", zlib.crc32(block))
+            )
        if palette is not None:
            block = b"PLTE"
            for col in palette:
@ -271,7 +284,7 @@ def write_png(data, path, bitdepth, colortype, palette=None):
        f.write(struct.pack(">I", 0) + block + struct.pack(">I", zlib.crc32(block)))


-def compare_ghostscript(tmpdir, img, pdf, gsdevice="png16m", exact=True):
+def compare_ghostscript(tmpdir, img, pdf, gsdevice="png16m", exact=True, icc=False):
    if gsdevice in ["png16m", "pnggray"]:
        ext = "png"
    elif gsdevice in ["tiff24nc", "tiff32nc", "tiff48nc"]:
@ -291,9 +304,34 @@ def compare_ghostscript(tmpdir, img, pdf, gsdevice="png16m", exact=True):
        ]
    )
    if exact:
-        subprocess.check_call(
-            ["compare", "-metric", "AE", str(img), str(tmpdir / "gs-1.") + ext, "null:"]
-        )
+        if icc:
+            subprocess.check_call(
+                [
+                    "compare",
+                    "-metric",
+                    "AE",
+                    "(",
+                    "-profile",
+                    "/usr/share/color/icc/ghostscript/srgb.icc",
+                    "-depth",
+                    "8",
+                    str(img),
+                    ")",
+                    str(tmpdir / "gs-1.") + ext,
+                    "null:",
+                ]
+            )
+        else:
+            subprocess.check_call(
+                [
+                    "compare",
+                    "-metric",
+                    "AE",
+                    str(img),
+                    str(tmpdir / "gs-1.") + ext,
+                    "null:",
+                ]
+            )
    else:
        psnr = subprocess.run(
            [
@ -620,6 +658,25 @@ def tmp_inverse_png(tmp_path_factory, alpha):
    tmp_inverse_png.unlink()


+@pytest.fixture(scope="session")
+def tmp_icc_png(tmp_path_factory, alpha):
+    normal16 = alpha[:, :, 0:3]
+    tmp_icc_png = tmp_path_factory.mktemp("icc_png") / "icc.png"
+    write_png(
+        0xFF - normal16 / 0xFFFF * 0xFF,
+        str(tmp_icc_png),
+        8,
+        2,
+        iccp="/usr/share/color/icc/sRGB.icc",
+    )
+    assert (
+        hashlib.md5(tmp_icc_png.read_bytes()).hexdigest()
+        == "d09865464626a87b4e7f398e1f914cca"
+    )
+    yield tmp_icc_png
+    tmp_icc_png.unlink()
+
+
@pytest.fixture(scope="session")
 def tmp_normal16_png(tmp_path_factory, alpha):
    normal16 = alpha[:, :, 0:3]
@ -3657,6 +3714,59 @@ def tiff_ccitt_nometa2_img(tmp_path_factory, tmp_gray1_png):
    in_img.unlink()


+@pytest.fixture(scope="session")
+def png_icc_img(tmp_icc_png):
+    in_img = tmp_icc_png
+    identify = json.loads(subprocess.check_output(["convert", str(in_img), "json:"]))
+    assert len(identify) == 1
+    # somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
+    # put into an array, here we cater for the older version containing just
+    # the bare dictionary
+    if "image" in identify:
+        identify = [identify]
+    assert "image" in identify[0]
+    assert identify[0]["image"].get("format") == "PNG", str(identify)
+    assert (
+        identify[0]["image"].get("formatDescription") == "Portable Network Graphics"
+    ), str(identify)
+    assert identify[0]["image"].get("mimeType") == "image/png", str(identify)
+    assert identify[0]["image"].get("geometry") == {
+        "width": 60,
+        "height": 60,
+        "x": 0,
+        "y": 0,
+    }, str(identify)
+    assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
+    assert identify[0]["image"].get("type") == "TrueColor", str(identify)
+    assert identify[0]["image"].get("depth") == 8, str(identify)
+    assert identify[0]["image"].get("pageGeometry") == {
+        "width": 60,
+        "height": 60,
+        "x": 0,
+        "y": 0,
+    }, str(identify)
+    assert identify[0]["image"].get("compression") == "Zip", str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("png:IHDR.bit-depth-orig") == "8"
+    ), str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("png:IHDR.bit_depth") == "8"
+    ), str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("png:IHDR.color-type-orig")
+        == "2"
+    ), str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("png:IHDR.color_type")
+        == "2 (Truecolor)"
+    ), str(identify)
+    assert (
+        identify[0]["image"]["properties"]["png:IHDR.interlace_method"]
+        == "0 (Not interlaced)"
+    ), str(identify)
+    return in_img
+
+
 ###############################################################################
 #                                OUTPUT FIXTURES                              #
 ###############################################################################
@ -4138,6 +4248,42 @@ def png_palette8_pdf(tmp_path_factory, tmp_palette8_png, request):
    out_pdf.unlink()


+@pytest.fixture(scope="session", params=["internal", "pikepdf", "pdfrw"])
+def png_icc_pdf(tmp_path_factory, tmp_icc_png, request):
+    out_pdf = tmp_path_factory.mktemp("png_icc_pdf") / "out.pdf"
+    subprocess.check_call(
+        [
+            "src/img2pdf.py",
+            "--producer=",
+            "--nodate",
+            "--engine=" + request.param,
+            "--output=" + str(out_pdf),
+            str(tmp_icc_png),
+        ]
+    )
+    with pikepdf.open(str(out_pdf)) as p:
+        assert (
+            p.pages[0].Contents.read_bytes()
+            == b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
+        )
+        assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8
+        assert p.pages[0].Resources.XObject.Im0.ColorSpace[0] == "/ICCBased"
+        assert p.pages[0].Resources.XObject.Im0.ColorSpace[1].N == 3
+        assert p.pages[0].Resources.XObject.Im0.ColorSpace[1].Alternate == "/DeviceRGB"
+        assert (
+            p.pages[0].Resources.XObject.Im0.ColorSpace[1].read_bytes()
+            == pathlib.Path("/usr/share/color/icc/sRGB.icc").read_bytes()
+        )
+        assert p.pages[0].Resources.XObject.Im0.DecodeParms.BitsPerComponent == 8
+        assert p.pages[0].Resources.XObject.Im0.DecodeParms.Colors == 3
+        assert p.pages[0].Resources.XObject.Im0.DecodeParms.Predictor == 15
+        assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode"
+        assert p.pages[0].Resources.XObject.Im0.Height == 60
+        assert p.pages[0].Resources.XObject.Im0.Width == 60
+    yield out_pdf
+    out_pdf.unlink()
+
+
@pytest.fixture(scope="session", params=["internal", "pikepdf"])
 def gif_palette1_pdf(tmp_path_factory, gif_palette1_img, request):
    out_pdf = tmp_path_factory.mktemp("gif_palette1_pdf") / "out.pdf"
@ -5166,6 +5312,18 @@ def test_png_palette8(tmp_path_factory, png_palette8_img, png_palette8_pdf):
    # pdfimages cannot export palette based images


+@pytest.mark.skipif(
+    sys.platform in ["darwin", "win32"],
+    reason="test utilities not available on Windows and MacOS",
+)
+def test_png_icc(tmp_path_factory, png_icc_img, png_icc_pdf):
+    tmpdir = tmp_path_factory.mktemp("png_icc")
+    compare_ghostscript(tmpdir, png_icc_img, png_icc_pdf, icc=True)
+    # compare_poppler(tmpdir, png_icc_img, png_icc_pdf)
+    # compare_mupdf(tmpdir, png_icc_img, png_icc_pdf)
+    # compare_pdfimages_png(tmpdir, png_icc_img, png_icc_pdf)
+
+
@pytest.mark.skipif(
    sys.platform in ["darwin", "win32"],
    reason="test utilities not available on Windows and MacOS",