1
0
Fork 0
forked from josch/img2pdf

support for MIFF which allows 16 bit CMYK images

closes: #144
This commit is contained in:
Johannes Schauer Marin Rodrigues 2022-06-26 16:48:10 +01:00 committed by Johannes Schauer Marin Rodrigues
parent d9b90499f3
commit bad6fcae39
2 changed files with 269 additions and 4 deletions

View file

@ -45,6 +45,7 @@ import struct
import platform import platform
import hashlib import hashlib
from itertools import chain from itertools import chain
import re
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -125,7 +126,9 @@ PageOrientation = Enum("PageOrientation", "portrait landscape")
Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other") Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other")
ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO other") ImageFormat = Enum(
"ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other"
)
PageMode = Enum("PageMode", "none outlines thumbs") PageMode = Enum("PageMode", "none outlines thumbs")
@ -1533,6 +1536,166 @@ def parse_png(rawdata):
return pngidat, palette return pngidat, palette
miff_re = re.compile(
r"""
[^\x00-\x20\x7f-\x9f] # the field name must not start with a control char or space
[^=]+ # the field name can even contain spaces
= # field name and value are separated by an equal sign
(?:
[^\x00-\x20\x7f-\x9f{}] # either chars that are not braces and not control chars
|{[^}]*} # or any kind of char surrounded by braces
)+""",
re.VERBOSE,
)
# https://imagemagick.org/script/miff.php
def parse_miff(data):
results = []
header, rest = data.split(b":\x1a", 1)
header = header.decode("ISO-8859-1")
assert header.lower().startswith("id=imagemagick")
hdata = {}
for i, line in enumerate(re.findall(miff_re, header)):
if not line:
continue
k, v = line.split("=", 1)
if i == 0:
assert k.lower() == "id"
assert v.lower() == "imagemagick"
match k.lower():
case "class":
match v:
case "DirectClass" | "PseudoClass":
hdata["class"] = v
case _:
print("cannot understand class", v)
case "colorspace":
# theoretically RGBA and CMYKA should be supported as well
# please teach me how to create such a MIFF file
match v:
case "sRGB" | "CMYK" | "Gray":
hdata["colorspace"] = v
case _:
print("cannot understand colorspace", v)
case "depth":
match v:
case "8" | "16" | "32":
hdata["depth"] = int(v)
case _:
print("cannot understand depth", v)
case "colors":
hdata["colors"] = int(v)
case "matte":
match v:
case "True":
hdata["matte"] = True
case "False":
hdata["matte"] = False
case _:
print("cannot understand matte", v)
case "columns" | "rows":
hdata[k.lower()] = int(v)
case "compression":
print("compression not yet supported")
case "profile":
assert v in ["icc", "exif"]
hdata["profile"] = v
case "resolution":
dpix, dpiy = v.split("x", 1)
hdata["resolution"] = (float(dpix), float(dpiy))
assert "depth" in hdata
assert "columns" in hdata
assert "rows" in hdata
match hdata["class"]:
case "DirectClass":
if "colors" in hdata:
assert hdata["colors"] == 0
match hdata["colorspace"]:
case "sRGB":
numchannels = 3
colorspace = Colorspace.RGB
case "CMYK":
numchannels = 4
colorspace = Colorspace.CMYK
case "Gray":
numchannels = 1
colorspace = Colorspace.L
if hdata["matte"]:
numchannels += 1
if hdata.get("profile"):
# there is no key encoding the length of icc or exif data
# according to the docs, the profile-icc key is supposed to do this
print("FAIL: exif")
else:
lenimgdata = (
hdata["depth"] // 8 * numchannels * hdata["columns"] * hdata["rows"]
)
assert len(rest) >= lenimgdata, (
len(rest),
hdata["depth"],
numchannels,
hdata["columns"],
hdata["rows"],
lenimgdata,
)
results.append(
(
colorspace,
hdata.get("resolution") or (default_dpi, default_dpi),
ImageFormat.MIFF,
zlib.compress(rest[:lenimgdata]),
None, # smask
hdata["columns"],
hdata["rows"],
[], # palette
False, # inverted
hdata["depth"],
0, # rotation
None, # icc profile
)
)
if len(rest) > lenimgdata:
# another image is here
assert rest[lenimgdata:][:14].lower() == b"id=imagemagick"
results.extend(parse_miff(rest[lenimgdata:]))
case "PseudoClass":
assert "colors" in hdata
if hdata["matte"]:
numchannels = 2
else:
numchannels = 1
lenpal = 3 * hdata["colors"] * hdata["depth"] // 8
lenimgdata = numchannels * hdata["rows"] * hdata["columns"]
assert len(rest) >= lenpal + lenimgdata, (len(rest), lenpal, lenimgdata)
results.append(
(
Colorspace.RGB,
hdata.get("resolution") or (default_dpi, default_dpi),
ImageFormat.MIFF,
zlib.compress(rest[lenpal : lenpal + lenimgdata]),
None, # FIXME: allow alpha channel smask
hdata["columns"],
hdata["rows"],
rest[:lenpal], # palette
False, # inverted
hdata["depth"],
0, # rotation
None, # icc profile
)
)
if len(rest) > lenpal + lenimgdata:
# another image is here
assert rest[lenpal + lenimgdata :][:14].lower() == b"id=imagemagick", (
len(rest),
lenpal,
lenimgdata,
)
results.extend(parse_miff(rest[lenpal + lenimgdata :]))
return results
def read_images(rawdata, colorspace, first_frame_only=False, rot=None): def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
im = BytesIO(rawdata) im = BytesIO(rawdata)
im.seek(0) im.seek(0)
@ -1541,13 +1704,19 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
imgdata = Image.open(im) imgdata = Image.open(im)
except IOError as e: except IOError as e:
# test if it is a jpeg2000 image # test if it is a jpeg2000 image
if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": if rawdata[:12] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
# image is jpeg2000
imgformat = ImageFormat.JPEG2000
if rawdata[:14].lower() == b"id=imagemagick":
# image is in MIFF format
# this is useful for 16 bit CMYK because PNG cannot do CMYK and thus
# we need PIL but PIL cannot do 16 bit
imgformat = ImageFormat.MIFF
else:
raise ImageOpenError( raise ImageOpenError(
"cannot read input image (not jpeg2000). " "cannot read input image (not jpeg2000). "
"PIL: error reading image: %s" % e "PIL: error reading image: %s" % e
) )
# image is jpeg2000
imgformat = ImageFormat.JPEG2000
else: else:
logger.debug("PIL format = %s", imgdata.format) logger.debug("PIL format = %s", imgdata.format)
imgformat = None imgformat = None
@ -1710,6 +1879,10 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
) )
] ]
if imgformat == ImageFormat.MIFF:
return parse_miff(rawdata)
# If our input is not JPEG or PNG, then we might have a format that # If our input is not JPEG or PNG, then we might have a format that
# supports multiple frames (like TIFF or GIF), so we need a loop to # supports multiple frames (like TIFF or GIF), so we need a loop to
# iterate through all frames of the image. # iterate through all frames of the image.
@ -2344,6 +2517,10 @@ def convert(*images, **kwargs):
rawdata = f.read() rawdata = f.read()
f.close() f.close()
#md5 = hashlib.md5(rawdata).hexdigest()
#with open("./testdata/" + md5, "wb") as f:
# f.write(rawdata)
for ( for (
color, color,
ndpi, ndpi,

View file

@ -3875,6 +3875,51 @@ def tiff_ccitt_nometa2_img(tmp_path_factory, tmp_gray1_png):
yield in_img yield in_img
in_img.unlink() in_img.unlink()
@pytest.fixture(scope="session")
def miff_cmyk16_img(tmp_path_factory, tmp_normal_png):
in_img = tmp_path_factory.mktemp("miff_cmyk16") / "in.miff"
subprocess.check_call(
CONVERT
+ [
str(tmp_normal_png),
"-depth",
"16",
"-colorspace",
"cmyk",
str(in_img),
]
)
identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"]))
assert len(identify) == 1
# somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
# put into an array, here we cater for the older version containing just
# the bare dictionary
if "image" in identify:
identify = [identify]
assert "image" in identify[0]
assert identify[0]["image"].get("format") == "MIFF", str(identify)
assert identify[0]["image"].get("geometry") == {
"width": 60,
"height": 60,
"x": 0,
"y": 0,
}, str(identify)
assert identify[0]["image"].get("colorspace") == "CMYK", str(identify)
assert identify[0]["image"].get("type") == "ColorSeparation", str(identify)
endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
identify
) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 16, str(identify)
assert identify[0]["image"].get("baseDepth") == 16, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
"height": 60,
"x": 0,
"y": 0,
}, str(identify)
yield in_img
in_img.unlink()
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def png_icc_img(tmp_icc_png): def png_icc_img(tmp_icc_png):
@ -5261,6 +5306,35 @@ def tiff_ccitt_nometa2_pdf(tmp_path_factory, tiff_ccitt_nometa2_img, request):
out_pdf.unlink() out_pdf.unlink()
@pytest.fixture(scope="session", params=["internal", "pikepdf"])
def miff_cmyk16_pdf(tmp_path_factory, miff_cmyk16_img, request):
out_pdf = tmp_path_factory.mktemp("miff_cmyk16_pdf") / "out.pdf"
subprocess.check_call(
[
img2pdfprog,
"--producer=",
"--nodate",
"--engine=" + request.param,
"--output=" + str(out_pdf),
str(miff_cmyk16_img),
]
)
with pikepdf.open(str(out_pdf)) as p:
assert (
p.pages[0].Contents.read_bytes()
== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
)
assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 16
assert p.pages[0].Resources.XObject.Im0.ColorSpace == "/DeviceCMYK"
assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode"
assert p.pages[0].Resources.XObject.Im0.Height == 60
assert p.pages[0].Resources.XObject.Im0.Width == 60
yield out_pdf
out_pdf.unlink()
############################################################################### ###############################################################################
# TEST CASES # # TEST CASES #
############################################################################### ###############################################################################
@ -6123,6 +6197,20 @@ def test_tiff_ccitt_nometa2(
compare_pdfimages_tiff(tmpdir, tiff_ccitt_nometa2_img, tiff_ccitt_nometa2_pdf) compare_pdfimages_tiff(tmpdir, tiff_ccitt_nometa2_img, tiff_ccitt_nometa2_pdf)
@pytest.mark.skipif(
sys.platform in ["win32"],
reason="test utilities not available on Windows and MacOS",
)
def test_miff_cmyk16(tmp_path_factory, miff_cmyk16_img, tiff_cmyk16_img, miff_cmyk16_pdf):
tmpdir = tmp_path_factory.mktemp("miff_cmyk16")
compare_ghostscript(
tmpdir, tiff_cmyk16_img, miff_cmyk16_pdf, gsdevice="tiff32nc", exact=False
)
# not testing with poppler as it cannot write CMYK images
compare_mupdf(tmpdir, tiff_cmyk16_img, miff_cmyk16_pdf, exact=False, cmyk=True)
#compare_pdfimages_tiff(tmpdir, tiff_cmyk16_img, miff_cmyk16_pdf)
# we define some variables so that the table below can be narrower # we define some variables so that the table below can be narrower
psl = (972, 504) # --pagesize landscape psl = (972, 504) # --pagesize landscape
psp = (504, 972) # --pagesize portrait psp = (504, 972) # --pagesize portrait