first stab at embedding ICC profiles

This commit is contained in:
Johannes 'josch' Schauer 2020-08-07 00:13:53 +02:00
parent 1ba02bf838
commit f0b57985ee
Signed by untrusted user: josch
GPG key ID: F2CBA5C78FBD83E1
2 changed files with 199 additions and 10 deletions

View file

@ -752,6 +752,7 @@ class pdfdoc(object):
bleedborder=None,
trimborder=None,
artborder=None,
iccp=None,
):
if self.engine == Engine.pikepdf:
PdfArray = pikepdf.Array
@ -804,6 +805,22 @@ class pdfdoc(object):
else:
raise UnsupportedColorspaceError("unsupported color space: %s" % color.name)
if iccp is not None:
if self.engine == Engine.pikepdf:
iccpdict = self.writer.make_stream(iccp)
else:
iccpdict = PdfDict(stream=convert_load(iccp))
iccpdict[PdfName.Alternate] = colorspace
if color == Colorspace["1"] or color == Colorspace.L:
iccpdict[PdfName.N] = 1
elif color == Colorspace.RGB:
iccpdict[PdfName.N] = 3
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
iccpdict[PdfName.N] = 4
elif color == Colorspace.P:
raise Exception("Cannot have Palette images with ICC profile")
colorspace = [PdfName.ICCBased, iccpdict]
# either embed the whole jpeg or deflate the bitmap representation
if imgformat is ImageFormat.JPEG:
ofilter = PdfName.DCTDecode
@ -930,6 +947,8 @@ class pdfdoc(object):
if self.engine == Engine.internal:
self.writer.addobj(content)
self.writer.addobj(image)
if iccp is not None:
self.writer.addobj(iccpdict)
def tostring(self):
stream = BytesIO()
@ -1240,9 +1259,13 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None):
color = Colorspace["CMYK;I"]
logging.debug("input colorspace = %s", color.name)
iccp = None
if "icc_profile" in imgdata.info:
iccp = imgdata.info.get("icc_profile")
logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
return (color, ndpi, imgwidthpx, imgheightpx, rotation)
return (color, ndpi, imgwidthpx, imgheightpx, rotation, iccp)
def ccitt_payload_location_from_pil(img):
@ -1348,7 +1371,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
# JPEG and JPEG2000 can be embedded into the PDF as-is
if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000:
color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
imgdata, imgformat, default_dpi, colorspace, rawdata
)
if color == Colorspace["1"]:
@ -1371,6 +1394,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
False,
8,
rotation,
iccp,
)
]
@ -1382,7 +1406,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
# IHDR chunk. We know where to find that in the file because the IHDR chunk
# must be the first chunk.
if imgformat == ImageFormat.PNG and rawdata[28] == 0:
color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
imgdata, imgformat, default_dpi, colorspace, rawdata
)
pngidat, palette = parse_png(rawdata)
@ -1407,6 +1431,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
False,
depth,
rotation,
iccp,
)
]
@ -1463,7 +1488,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
"unsupported photometric interpretation for "
"group4 tiff: %d" % photo
)
color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
imgdata, imgformat, default_dpi, colorspace, rawdata
)
offset, length = ccitt_payload_location_from_pil(imgdata)
@ -1498,6 +1523,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
inverted,
1,
rotation,
iccp,
)
)
img_page_count += 1
@ -1505,7 +1531,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
logging.debug("Converting frame: %d" % img_page_count)
color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
imgdata, imgformat, default_dpi, colorspace
)
@ -1526,6 +1552,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
False,
1,
rotation,
iccp,
)
)
img_page_count += 1
@ -1563,6 +1590,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
False,
8,
rotation,
iccp,
)
)
else:
@ -1593,6 +1621,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
False,
depth,
rotation,
iccp,
)
)
img_page_count += 1
@ -1999,6 +2028,7 @@ def convert(*images, **kwargs):
inverted,
depth,
rotation,
iccp,
) in read_images(rawdata, kwargs["colorspace"], kwargs["first_frame_only"]):
pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"](
imgwidthpx, imgheightpx, ndpi
@ -2044,6 +2074,7 @@ def convert(*images, **kwargs):
kwargs["bleedborder"],
kwargs["trimborder"],
kwargs["artborder"],
iccp,
)
if kwargs["outputstream"]:

View file

@ -18,6 +18,7 @@ import decimal
from packaging.version import parse as parse_version
import warnings
import json
import pathlib
HAVE_MUTOOL = True
try:
@ -207,7 +208,7 @@ def compress(data):
return result
def write_png(data, path, bitdepth, colortype, palette=None):
def write_png(data, path, bitdepth, colortype, palette=None, iccp=None):
with open(str(path), "wb") as f:
f.write(b"\x89PNG\r\n\x1A\n")
# PNG image type Colour type Allowed bit depths
@ -231,6 +232,18 @@ def write_png(data, path, bitdepth, colortype, palette=None):
+ block
+ struct.pack(">I", zlib.crc32(block))
)
if iccp is not None:
with open(iccp, "rb") as infh:
iccdata = infh.read()
block = b"iCCP"
block += b"icc\0" # arbitrary profile name
block += b"\0" # compression method (deflate)
block += zlib.compress(iccdata)
f.write(
struct.pack(">I", len(block) - 4)
+ block
+ struct.pack(">I", zlib.crc32(block))
)
if palette is not None:
block = b"PLTE"
for col in palette:
@ -271,7 +284,7 @@ def write_png(data, path, bitdepth, colortype, palette=None):
f.write(struct.pack(">I", 0) + block + struct.pack(">I", zlib.crc32(block)))
def compare_ghostscript(tmpdir, img, pdf, gsdevice="png16m", exact=True):
def compare_ghostscript(tmpdir, img, pdf, gsdevice="png16m", exact=True, icc=False):
if gsdevice in ["png16m", "pnggray"]:
ext = "png"
elif gsdevice in ["tiff24nc", "tiff32nc", "tiff48nc"]:
@ -291,9 +304,34 @@ def compare_ghostscript(tmpdir, img, pdf, gsdevice="png16m", exact=True):
]
)
if exact:
subprocess.check_call(
["compare", "-metric", "AE", str(img), str(tmpdir / "gs-1.") + ext, "null:"]
)
if icc:
subprocess.check_call(
[
"compare",
"-metric",
"AE",
"(",
"-profile",
"/usr/share/color/icc/ghostscript/srgb.icc",
"-depth",
"8",
str(img),
")",
str(tmpdir / "gs-1.") + ext,
"null:",
]
)
else:
subprocess.check_call(
[
"compare",
"-metric",
"AE",
str(img),
str(tmpdir / "gs-1.") + ext,
"null:",
]
)
else:
psnr = subprocess.run(
[
@ -620,6 +658,25 @@ def tmp_inverse_png(tmp_path_factory, alpha):
tmp_inverse_png.unlink()
@pytest.fixture(scope="session")
def tmp_icc_png(tmp_path_factory, alpha):
normal16 = alpha[:, :, 0:3]
tmp_icc_png = tmp_path_factory.mktemp("icc_png") / "icc.png"
write_png(
0xFF - normal16 / 0xFFFF * 0xFF,
str(tmp_icc_png),
8,
2,
iccp="/usr/share/color/icc/sRGB.icc",
)
assert (
hashlib.md5(tmp_icc_png.read_bytes()).hexdigest()
== "d09865464626a87b4e7f398e1f914cca"
)
yield tmp_icc_png
tmp_icc_png.unlink()
@pytest.fixture(scope="session")
def tmp_normal16_png(tmp_path_factory, alpha):
normal16 = alpha[:, :, 0:3]
@ -3657,6 +3714,59 @@ def tiff_ccitt_nometa2_img(tmp_path_factory, tmp_gray1_png):
in_img.unlink()
@pytest.fixture(scope="session")
def png_icc_img(tmp_icc_png):
in_img = tmp_icc_png
identify = json.loads(subprocess.check_output(["convert", str(in_img), "json:"]))
assert len(identify) == 1
# somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
# put into an array, here we cater for the older version containing just
# the bare dictionary
if "image" in identify:
identify = [identify]
assert "image" in identify[0]
assert identify[0]["image"].get("format") == "PNG", str(identify)
assert (
identify[0]["image"].get("formatDescription") == "Portable Network Graphics"
), str(identify)
assert identify[0]["image"].get("mimeType") == "image/png", str(identify)
assert identify[0]["image"].get("geometry") == {
"width": 60,
"height": 60,
"x": 0,
"y": 0,
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "TrueColor", str(identify)
assert identify[0]["image"].get("depth") == 8, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
"height": 60,
"x": 0,
"y": 0,
}, str(identify)
assert identify[0]["image"].get("compression") == "Zip", str(identify)
assert (
identify[0]["image"].get("properties", {}).get("png:IHDR.bit-depth-orig") == "8"
), str(identify)
assert (
identify[0]["image"].get("properties", {}).get("png:IHDR.bit_depth") == "8"
), str(identify)
assert (
identify[0]["image"].get("properties", {}).get("png:IHDR.color-type-orig")
== "2"
), str(identify)
assert (
identify[0]["image"].get("properties", {}).get("png:IHDR.color_type")
== "2 (Truecolor)"
), str(identify)
assert (
identify[0]["image"]["properties"]["png:IHDR.interlace_method"]
== "0 (Not interlaced)"
), str(identify)
return in_img
###############################################################################
# OUTPUT FIXTURES #
###############################################################################
@ -4138,6 +4248,42 @@ def png_palette8_pdf(tmp_path_factory, tmp_palette8_png, request):
out_pdf.unlink()
@pytest.fixture(scope="session", params=["internal", "pikepdf", "pdfrw"])
def png_icc_pdf(tmp_path_factory, tmp_icc_png, request):
out_pdf = tmp_path_factory.mktemp("png_icc_pdf") / "out.pdf"
subprocess.check_call(
[
"src/img2pdf.py",
"--producer=",
"--nodate",
"--engine=" + request.param,
"--output=" + str(out_pdf),
str(tmp_icc_png),
]
)
with pikepdf.open(str(out_pdf)) as p:
assert (
p.pages[0].Contents.read_bytes()
== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
)
assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8
assert p.pages[0].Resources.XObject.Im0.ColorSpace[0] == "/ICCBased"
assert p.pages[0].Resources.XObject.Im0.ColorSpace[1].N == 3
assert p.pages[0].Resources.XObject.Im0.ColorSpace[1].Alternate == "/DeviceRGB"
assert (
p.pages[0].Resources.XObject.Im0.ColorSpace[1].read_bytes()
== pathlib.Path("/usr/share/color/icc/sRGB.icc").read_bytes()
)
assert p.pages[0].Resources.XObject.Im0.DecodeParms.BitsPerComponent == 8
assert p.pages[0].Resources.XObject.Im0.DecodeParms.Colors == 3
assert p.pages[0].Resources.XObject.Im0.DecodeParms.Predictor == 15
assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode"
assert p.pages[0].Resources.XObject.Im0.Height == 60
assert p.pages[0].Resources.XObject.Im0.Width == 60
yield out_pdf
out_pdf.unlink()
@pytest.fixture(scope="session", params=["internal", "pikepdf"])
def gif_palette1_pdf(tmp_path_factory, gif_palette1_img, request):
out_pdf = tmp_path_factory.mktemp("gif_palette1_pdf") / "out.pdf"
@ -5166,6 +5312,18 @@ def test_png_palette8(tmp_path_factory, png_palette8_img, png_palette8_pdf):
# pdfimages cannot export palette based images
@pytest.mark.skipif(
sys.platform in ["darwin", "win32"],
reason="test utilities not available on Windows and MacOS",
)
def test_png_icc(tmp_path_factory, png_icc_img, png_icc_pdf):
tmpdir = tmp_path_factory.mktemp("png_icc")
compare_ghostscript(tmpdir, png_icc_img, png_icc_pdf, icc=True)
# compare_poppler(tmpdir, png_icc_img, png_icc_pdf)
# compare_mupdf(tmpdir, png_icc_img, png_icc_pdf)
# compare_pdfimages_png(tmpdir, png_icc_img, png_icc_pdf)
@pytest.mark.skipif(
sys.platform in ["darwin", "win32"],
reason="test utilities not available on Windows and MacOS",