forked from josch/img2pdf
first stab at embedding ICC profiles
This commit is contained in:
parent
1ba02bf838
commit
f0b57985ee
2 changed files with 199 additions and 10 deletions
|
@ -752,6 +752,7 @@ class pdfdoc(object):
|
|||
bleedborder=None,
|
||||
trimborder=None,
|
||||
artborder=None,
|
||||
iccp=None,
|
||||
):
|
||||
if self.engine == Engine.pikepdf:
|
||||
PdfArray = pikepdf.Array
|
||||
|
@ -804,6 +805,22 @@ class pdfdoc(object):
|
|||
else:
|
||||
raise UnsupportedColorspaceError("unsupported color space: %s" % color.name)
|
||||
|
||||
if iccp is not None:
|
||||
if self.engine == Engine.pikepdf:
|
||||
iccpdict = self.writer.make_stream(iccp)
|
||||
else:
|
||||
iccpdict = PdfDict(stream=convert_load(iccp))
|
||||
iccpdict[PdfName.Alternate] = colorspace
|
||||
if color == Colorspace["1"] or color == Colorspace.L:
|
||||
iccpdict[PdfName.N] = 1
|
||||
elif color == Colorspace.RGB:
|
||||
iccpdict[PdfName.N] = 3
|
||||
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
|
||||
iccpdict[PdfName.N] = 4
|
||||
elif color == Colorspace.P:
|
||||
raise Exception("Cannot have Palette images with ICC profile")
|
||||
colorspace = [PdfName.ICCBased, iccpdict]
|
||||
|
||||
# either embed the whole jpeg or deflate the bitmap representation
|
||||
if imgformat is ImageFormat.JPEG:
|
||||
ofilter = PdfName.DCTDecode
|
||||
|
@ -930,6 +947,8 @@ class pdfdoc(object):
|
|||
if self.engine == Engine.internal:
|
||||
self.writer.addobj(content)
|
||||
self.writer.addobj(image)
|
||||
if iccp is not None:
|
||||
self.writer.addobj(iccpdict)
|
||||
|
||||
def tostring(self):
|
||||
stream = BytesIO()
|
||||
|
@ -1240,9 +1259,13 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None):
|
|||
color = Colorspace["CMYK;I"]
|
||||
logging.debug("input colorspace = %s", color.name)
|
||||
|
||||
iccp = None
|
||||
if "icc_profile" in imgdata.info:
|
||||
iccp = imgdata.info.get("icc_profile")
|
||||
|
||||
logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
|
||||
|
||||
return (color, ndpi, imgwidthpx, imgheightpx, rotation)
|
||||
return (color, ndpi, imgwidthpx, imgheightpx, rotation, iccp)
|
||||
|
||||
|
||||
def ccitt_payload_location_from_pil(img):
|
||||
|
@ -1348,7 +1371,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
|
|||
|
||||
# JPEG and JPEG2000 can be embedded into the PDF as-is
|
||||
if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000:
|
||||
color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
|
||||
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
|
||||
imgdata, imgformat, default_dpi, colorspace, rawdata
|
||||
)
|
||||
if color == Colorspace["1"]:
|
||||
|
@ -1371,6 +1394,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
|
|||
False,
|
||||
8,
|
||||
rotation,
|
||||
iccp,
|
||||
)
|
||||
]
|
||||
|
||||
|
@ -1382,7 +1406,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
|
|||
# IHDR chunk. We know where to find that in the file because the IHDR chunk
|
||||
# must be the first chunk.
|
||||
if imgformat == ImageFormat.PNG and rawdata[28] == 0:
|
||||
color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
|
||||
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
|
||||
imgdata, imgformat, default_dpi, colorspace, rawdata
|
||||
)
|
||||
pngidat, palette = parse_png(rawdata)
|
||||
|
@ -1407,6 +1431,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
|
|||
False,
|
||||
depth,
|
||||
rotation,
|
||||
iccp,
|
||||
)
|
||||
]
|
||||
|
||||
|
@ -1463,7 +1488,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
|
|||
"unsupported photometric interpretation for "
|
||||
"group4 tiff: %d" % photo
|
||||
)
|
||||
color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
|
||||
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
|
||||
imgdata, imgformat, default_dpi, colorspace, rawdata
|
||||
)
|
||||
offset, length = ccitt_payload_location_from_pil(imgdata)
|
||||
|
@ -1498,6 +1523,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
|
|||
inverted,
|
||||
1,
|
||||
rotation,
|
||||
iccp,
|
||||
)
|
||||
)
|
||||
img_page_count += 1
|
||||
|
@ -1505,7 +1531,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
|
|||
|
||||
logging.debug("Converting frame: %d" % img_page_count)
|
||||
|
||||
color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
|
||||
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
|
||||
imgdata, imgformat, default_dpi, colorspace
|
||||
)
|
||||
|
||||
|
@ -1526,6 +1552,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
|
|||
False,
|
||||
1,
|
||||
rotation,
|
||||
iccp,
|
||||
)
|
||||
)
|
||||
img_page_count += 1
|
||||
|
@ -1563,6 +1590,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
|
|||
False,
|
||||
8,
|
||||
rotation,
|
||||
iccp,
|
||||
)
|
||||
)
|
||||
else:
|
||||
|
@ -1593,6 +1621,7 @@ def read_images(rawdata, colorspace, first_frame_only=False):
|
|||
False,
|
||||
depth,
|
||||
rotation,
|
||||
iccp,
|
||||
)
|
||||
)
|
||||
img_page_count += 1
|
||||
|
@ -1999,6 +2028,7 @@ def convert(*images, **kwargs):
|
|||
inverted,
|
||||
depth,
|
||||
rotation,
|
||||
iccp,
|
||||
) in read_images(rawdata, kwargs["colorspace"], kwargs["first_frame_only"]):
|
||||
pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"](
|
||||
imgwidthpx, imgheightpx, ndpi
|
||||
|
@ -2044,6 +2074,7 @@ def convert(*images, **kwargs):
|
|||
kwargs["bleedborder"],
|
||||
kwargs["trimborder"],
|
||||
kwargs["artborder"],
|
||||
iccp,
|
||||
)
|
||||
|
||||
if kwargs["outputstream"]:
|
||||
|
|
|
@ -18,6 +18,7 @@ import decimal
|
|||
from packaging.version import parse as parse_version
|
||||
import warnings
|
||||
import json
|
||||
import pathlib
|
||||
|
||||
HAVE_MUTOOL = True
|
||||
try:
|
||||
|
@ -207,7 +208,7 @@ def compress(data):
|
|||
return result
|
||||
|
||||
|
||||
def write_png(data, path, bitdepth, colortype, palette=None):
|
||||
def write_png(data, path, bitdepth, colortype, palette=None, iccp=None):
|
||||
with open(str(path), "wb") as f:
|
||||
f.write(b"\x89PNG\r\n\x1A\n")
|
||||
# PNG image type Colour type Allowed bit depths
|
||||
|
@ -231,6 +232,18 @@ def write_png(data, path, bitdepth, colortype, palette=None):
|
|||
+ block
|
||||
+ struct.pack(">I", zlib.crc32(block))
|
||||
)
|
||||
if iccp is not None:
|
||||
with open(iccp, "rb") as infh:
|
||||
iccdata = infh.read()
|
||||
block = b"iCCP"
|
||||
block += b"icc\0" # arbitrary profile name
|
||||
block += b"\0" # compression method (deflate)
|
||||
block += zlib.compress(iccdata)
|
||||
f.write(
|
||||
struct.pack(">I", len(block) - 4)
|
||||
+ block
|
||||
+ struct.pack(">I", zlib.crc32(block))
|
||||
)
|
||||
if palette is not None:
|
||||
block = b"PLTE"
|
||||
for col in palette:
|
||||
|
@ -271,7 +284,7 @@ def write_png(data, path, bitdepth, colortype, palette=None):
|
|||
f.write(struct.pack(">I", 0) + block + struct.pack(">I", zlib.crc32(block)))
|
||||
|
||||
|
||||
def compare_ghostscript(tmpdir, img, pdf, gsdevice="png16m", exact=True):
|
||||
def compare_ghostscript(tmpdir, img, pdf, gsdevice="png16m", exact=True, icc=False):
|
||||
if gsdevice in ["png16m", "pnggray"]:
|
||||
ext = "png"
|
||||
elif gsdevice in ["tiff24nc", "tiff32nc", "tiff48nc"]:
|
||||
|
@ -291,9 +304,34 @@ def compare_ghostscript(tmpdir, img, pdf, gsdevice="png16m", exact=True):
|
|||
]
|
||||
)
|
||||
if exact:
|
||||
subprocess.check_call(
|
||||
["compare", "-metric", "AE", str(img), str(tmpdir / "gs-1.") + ext, "null:"]
|
||||
)
|
||||
if icc:
|
||||
subprocess.check_call(
|
||||
[
|
||||
"compare",
|
||||
"-metric",
|
||||
"AE",
|
||||
"(",
|
||||
"-profile",
|
||||
"/usr/share/color/icc/ghostscript/srgb.icc",
|
||||
"-depth",
|
||||
"8",
|
||||
str(img),
|
||||
")",
|
||||
str(tmpdir / "gs-1.") + ext,
|
||||
"null:",
|
||||
]
|
||||
)
|
||||
else:
|
||||
subprocess.check_call(
|
||||
[
|
||||
"compare",
|
||||
"-metric",
|
||||
"AE",
|
||||
str(img),
|
||||
str(tmpdir / "gs-1.") + ext,
|
||||
"null:",
|
||||
]
|
||||
)
|
||||
else:
|
||||
psnr = subprocess.run(
|
||||
[
|
||||
|
@ -620,6 +658,25 @@ def tmp_inverse_png(tmp_path_factory, alpha):
|
|||
tmp_inverse_png.unlink()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def tmp_icc_png(tmp_path_factory, alpha):
|
||||
normal16 = alpha[:, :, 0:3]
|
||||
tmp_icc_png = tmp_path_factory.mktemp("icc_png") / "icc.png"
|
||||
write_png(
|
||||
0xFF - normal16 / 0xFFFF * 0xFF,
|
||||
str(tmp_icc_png),
|
||||
8,
|
||||
2,
|
||||
iccp="/usr/share/color/icc/sRGB.icc",
|
||||
)
|
||||
assert (
|
||||
hashlib.md5(tmp_icc_png.read_bytes()).hexdigest()
|
||||
== "d09865464626a87b4e7f398e1f914cca"
|
||||
)
|
||||
yield tmp_icc_png
|
||||
tmp_icc_png.unlink()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def tmp_normal16_png(tmp_path_factory, alpha):
|
||||
normal16 = alpha[:, :, 0:3]
|
||||
|
@ -3657,6 +3714,59 @@ def tiff_ccitt_nometa2_img(tmp_path_factory, tmp_gray1_png):
|
|||
in_img.unlink()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def png_icc_img(tmp_icc_png):
|
||||
in_img = tmp_icc_png
|
||||
identify = json.loads(subprocess.check_output(["convert", str(in_img), "json:"]))
|
||||
assert len(identify) == 1
|
||||
# somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
|
||||
# put into an array, here we cater for the older version containing just
|
||||
# the bare dictionary
|
||||
if "image" in identify:
|
||||
identify = [identify]
|
||||
assert "image" in identify[0]
|
||||
assert identify[0]["image"].get("format") == "PNG", str(identify)
|
||||
assert (
|
||||
identify[0]["image"].get("formatDescription") == "Portable Network Graphics"
|
||||
), str(identify)
|
||||
assert identify[0]["image"].get("mimeType") == "image/png", str(identify)
|
||||
assert identify[0]["image"].get("geometry") == {
|
||||
"width": 60,
|
||||
"height": 60,
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
}, str(identify)
|
||||
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
|
||||
assert identify[0]["image"].get("type") == "TrueColor", str(identify)
|
||||
assert identify[0]["image"].get("depth") == 8, str(identify)
|
||||
assert identify[0]["image"].get("pageGeometry") == {
|
||||
"width": 60,
|
||||
"height": 60,
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
}, str(identify)
|
||||
assert identify[0]["image"].get("compression") == "Zip", str(identify)
|
||||
assert (
|
||||
identify[0]["image"].get("properties", {}).get("png:IHDR.bit-depth-orig") == "8"
|
||||
), str(identify)
|
||||
assert (
|
||||
identify[0]["image"].get("properties", {}).get("png:IHDR.bit_depth") == "8"
|
||||
), str(identify)
|
||||
assert (
|
||||
identify[0]["image"].get("properties", {}).get("png:IHDR.color-type-orig")
|
||||
== "2"
|
||||
), str(identify)
|
||||
assert (
|
||||
identify[0]["image"].get("properties", {}).get("png:IHDR.color_type")
|
||||
== "2 (Truecolor)"
|
||||
), str(identify)
|
||||
assert (
|
||||
identify[0]["image"]["properties"]["png:IHDR.interlace_method"]
|
||||
== "0 (Not interlaced)"
|
||||
), str(identify)
|
||||
return in_img
|
||||
|
||||
|
||||
###############################################################################
|
||||
# OUTPUT FIXTURES #
|
||||
###############################################################################
|
||||
|
@ -4138,6 +4248,42 @@ def png_palette8_pdf(tmp_path_factory, tmp_palette8_png, request):
|
|||
out_pdf.unlink()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", params=["internal", "pikepdf", "pdfrw"])
|
||||
def png_icc_pdf(tmp_path_factory, tmp_icc_png, request):
|
||||
out_pdf = tmp_path_factory.mktemp("png_icc_pdf") / "out.pdf"
|
||||
subprocess.check_call(
|
||||
[
|
||||
"src/img2pdf.py",
|
||||
"--producer=",
|
||||
"--nodate",
|
||||
"--engine=" + request.param,
|
||||
"--output=" + str(out_pdf),
|
||||
str(tmp_icc_png),
|
||||
]
|
||||
)
|
||||
with pikepdf.open(str(out_pdf)) as p:
|
||||
assert (
|
||||
p.pages[0].Contents.read_bytes()
|
||||
== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
|
||||
)
|
||||
assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8
|
||||
assert p.pages[0].Resources.XObject.Im0.ColorSpace[0] == "/ICCBased"
|
||||
assert p.pages[0].Resources.XObject.Im0.ColorSpace[1].N == 3
|
||||
assert p.pages[0].Resources.XObject.Im0.ColorSpace[1].Alternate == "/DeviceRGB"
|
||||
assert (
|
||||
p.pages[0].Resources.XObject.Im0.ColorSpace[1].read_bytes()
|
||||
== pathlib.Path("/usr/share/color/icc/sRGB.icc").read_bytes()
|
||||
)
|
||||
assert p.pages[0].Resources.XObject.Im0.DecodeParms.BitsPerComponent == 8
|
||||
assert p.pages[0].Resources.XObject.Im0.DecodeParms.Colors == 3
|
||||
assert p.pages[0].Resources.XObject.Im0.DecodeParms.Predictor == 15
|
||||
assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode"
|
||||
assert p.pages[0].Resources.XObject.Im0.Height == 60
|
||||
assert p.pages[0].Resources.XObject.Im0.Width == 60
|
||||
yield out_pdf
|
||||
out_pdf.unlink()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", params=["internal", "pikepdf"])
|
||||
def gif_palette1_pdf(tmp_path_factory, gif_palette1_img, request):
|
||||
out_pdf = tmp_path_factory.mktemp("gif_palette1_pdf") / "out.pdf"
|
||||
|
@ -5166,6 +5312,18 @@ def test_png_palette8(tmp_path_factory, png_palette8_img, png_palette8_pdf):
|
|||
# pdfimages cannot export palette based images
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform in ["darwin", "win32"],
|
||||
reason="test utilities not available on Windows and MacOS",
|
||||
)
|
||||
def test_png_icc(tmp_path_factory, png_icc_img, png_icc_pdf):
|
||||
tmpdir = tmp_path_factory.mktemp("png_icc")
|
||||
compare_ghostscript(tmpdir, png_icc_img, png_icc_pdf, icc=True)
|
||||
# compare_poppler(tmpdir, png_icc_img, png_icc_pdf)
|
||||
# compare_mupdf(tmpdir, png_icc_img, png_icc_pdf)
|
||||
# compare_pdfimages_png(tmpdir, png_icc_img, png_icc_pdf)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform in ["darwin", "win32"],
|
||||
reason="test utilities not available on Windows and MacOS",
|
||||
|
|
Loading…
Reference in a new issue