forked from josch/img2pdf
parent
d9b90499f3
commit
bad6fcae39
2 changed files with 269 additions and 4 deletions
185
src/img2pdf.py
185
src/img2pdf.py
|
@ -45,6 +45,7 @@ import struct
|
|||
import platform
|
||||
import hashlib
|
||||
from itertools import chain
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -125,7 +126,9 @@ PageOrientation = Enum("PageOrientation", "portrait landscape")
|
|||
|
||||
Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other")
|
||||
|
||||
ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO other")
|
||||
ImageFormat = Enum(
|
||||
"ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other"
|
||||
)
|
||||
|
||||
PageMode = Enum("PageMode", "none outlines thumbs")
|
||||
|
||||
|
@ -1533,6 +1536,166 @@ def parse_png(rawdata):
|
|||
return pngidat, palette
|
||||
|
||||
|
||||
|
||||
miff_re = re.compile(
|
||||
r"""
|
||||
[^\x00-\x20\x7f-\x9f] # the field name must not start with a control char or space
|
||||
[^=]+ # the field name can even contain spaces
|
||||
= # field name and value are separated by an equal sign
|
||||
(?:
|
||||
[^\x00-\x20\x7f-\x9f{}] # either chars that are not braces and not control chars
|
||||
|{[^}]*} # or any kind of char surrounded by braces
|
||||
)+""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
# https://imagemagick.org/script/miff.php
|
||||
def parse_miff(data):
|
||||
results = []
|
||||
header, rest = data.split(b":\x1a", 1)
|
||||
header = header.decode("ISO-8859-1")
|
||||
assert header.lower().startswith("id=imagemagick")
|
||||
hdata = {}
|
||||
for i, line in enumerate(re.findall(miff_re, header)):
|
||||
if not line:
|
||||
continue
|
||||
k, v = line.split("=", 1)
|
||||
if i == 0:
|
||||
assert k.lower() == "id"
|
||||
assert v.lower() == "imagemagick"
|
||||
match k.lower():
|
||||
case "class":
|
||||
match v:
|
||||
case "DirectClass" | "PseudoClass":
|
||||
hdata["class"] = v
|
||||
case _:
|
||||
print("cannot understand class", v)
|
||||
case "colorspace":
|
||||
# theoretically RGBA and CMYKA should be supported as well
|
||||
# please teach me how to create such a MIFF file
|
||||
match v:
|
||||
case "sRGB" | "CMYK" | "Gray":
|
||||
hdata["colorspace"] = v
|
||||
case _:
|
||||
print("cannot understand colorspace", v)
|
||||
case "depth":
|
||||
match v:
|
||||
case "8" | "16" | "32":
|
||||
hdata["depth"] = int(v)
|
||||
case _:
|
||||
print("cannot understand depth", v)
|
||||
case "colors":
|
||||
hdata["colors"] = int(v)
|
||||
case "matte":
|
||||
match v:
|
||||
case "True":
|
||||
hdata["matte"] = True
|
||||
case "False":
|
||||
hdata["matte"] = False
|
||||
case _:
|
||||
print("cannot understand matte", v)
|
||||
case "columns" | "rows":
|
||||
hdata[k.lower()] = int(v)
|
||||
case "compression":
|
||||
print("compression not yet supported")
|
||||
case "profile":
|
||||
assert v in ["icc", "exif"]
|
||||
hdata["profile"] = v
|
||||
case "resolution":
|
||||
dpix, dpiy = v.split("x", 1)
|
||||
hdata["resolution"] = (float(dpix), float(dpiy))
|
||||
|
||||
assert "depth" in hdata
|
||||
assert "columns" in hdata
|
||||
assert "rows" in hdata
|
||||
match hdata["class"]:
|
||||
case "DirectClass":
|
||||
if "colors" in hdata:
|
||||
assert hdata["colors"] == 0
|
||||
match hdata["colorspace"]:
|
||||
case "sRGB":
|
||||
numchannels = 3
|
||||
colorspace = Colorspace.RGB
|
||||
case "CMYK":
|
||||
numchannels = 4
|
||||
colorspace = Colorspace.CMYK
|
||||
case "Gray":
|
||||
numchannels = 1
|
||||
colorspace = Colorspace.L
|
||||
if hdata["matte"]:
|
||||
numchannels += 1
|
||||
if hdata.get("profile"):
|
||||
# there is no key encoding the length of icc or exif data
|
||||
# according to the docs, the profile-icc key is supposed to do this
|
||||
print("FAIL: exif")
|
||||
else:
|
||||
lenimgdata = (
|
||||
hdata["depth"] // 8 * numchannels * hdata["columns"] * hdata["rows"]
|
||||
)
|
||||
assert len(rest) >= lenimgdata, (
|
||||
len(rest),
|
||||
hdata["depth"],
|
||||
numchannels,
|
||||
hdata["columns"],
|
||||
hdata["rows"],
|
||||
lenimgdata,
|
||||
)
|
||||
results.append(
|
||||
(
|
||||
colorspace,
|
||||
hdata.get("resolution") or (default_dpi, default_dpi),
|
||||
ImageFormat.MIFF,
|
||||
zlib.compress(rest[:lenimgdata]),
|
||||
None, # smask
|
||||
hdata["columns"],
|
||||
hdata["rows"],
|
||||
[], # palette
|
||||
False, # inverted
|
||||
hdata["depth"],
|
||||
0, # rotation
|
||||
None, # icc profile
|
||||
)
|
||||
)
|
||||
if len(rest) > lenimgdata:
|
||||
# another image is here
|
||||
assert rest[lenimgdata:][:14].lower() == b"id=imagemagick"
|
||||
results.extend(parse_miff(rest[lenimgdata:]))
|
||||
case "PseudoClass":
|
||||
assert "colors" in hdata
|
||||
if hdata["matte"]:
|
||||
numchannels = 2
|
||||
else:
|
||||
numchannels = 1
|
||||
lenpal = 3 * hdata["colors"] * hdata["depth"] // 8
|
||||
lenimgdata = numchannels * hdata["rows"] * hdata["columns"]
|
||||
assert len(rest) >= lenpal + lenimgdata, (len(rest), lenpal, lenimgdata)
|
||||
results.append(
|
||||
(
|
||||
Colorspace.RGB,
|
||||
hdata.get("resolution") or (default_dpi, default_dpi),
|
||||
ImageFormat.MIFF,
|
||||
zlib.compress(rest[lenpal : lenpal + lenimgdata]),
|
||||
None, # FIXME: allow alpha channel smask
|
||||
hdata["columns"],
|
||||
hdata["rows"],
|
||||
rest[:lenpal], # palette
|
||||
False, # inverted
|
||||
hdata["depth"],
|
||||
0, # rotation
|
||||
None, # icc profile
|
||||
)
|
||||
)
|
||||
if len(rest) > lenpal + lenimgdata:
|
||||
# another image is here
|
||||
assert rest[lenpal + lenimgdata :][:14].lower() == b"id=imagemagick", (
|
||||
len(rest),
|
||||
lenpal,
|
||||
lenimgdata,
|
||||
)
|
||||
results.extend(parse_miff(rest[lenpal + lenimgdata :]))
|
||||
return results
|
||||
|
||||
|
||||
def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||
im = BytesIO(rawdata)
|
||||
im.seek(0)
|
||||
|
@ -1541,13 +1704,19 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
imgdata = Image.open(im)
|
||||
except IOError as e:
|
||||
# test if it is a jpeg2000 image
|
||||
if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
||||
if rawdata[:12] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
||||
# image is jpeg2000
|
||||
imgformat = ImageFormat.JPEG2000
|
||||
if rawdata[:14].lower() == b"id=imagemagick":
|
||||
# image is in MIFF format
|
||||
# this is useful for 16 bit CMYK because PNG cannot do CMYK and thus
|
||||
# we need PIL but PIL cannot do 16 bit
|
||||
imgformat = ImageFormat.MIFF
|
||||
else:
|
||||
raise ImageOpenError(
|
||||
"cannot read input image (not jpeg2000). "
|
||||
"PIL: error reading image: %s" % e
|
||||
)
|
||||
# image is jpeg2000
|
||||
imgformat = ImageFormat.JPEG2000
|
||||
else:
|
||||
logger.debug("PIL format = %s", imgdata.format)
|
||||
imgformat = None
|
||||
|
@ -1710,6 +1879,10 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
)
|
||||
]
|
||||
|
||||
|
||||
if imgformat == ImageFormat.MIFF:
|
||||
return parse_miff(rawdata)
|
||||
|
||||
# If our input is not JPEG or PNG, then we might have a format that
|
||||
# supports multiple frames (like TIFF or GIF), so we need a loop to
|
||||
# iterate through all frames of the image.
|
||||
|
@ -2344,6 +2517,10 @@ def convert(*images, **kwargs):
|
|||
rawdata = f.read()
|
||||
f.close()
|
||||
|
||||
#md5 = hashlib.md5(rawdata).hexdigest()
|
||||
#with open("./testdata/" + md5, "wb") as f:
|
||||
# f.write(rawdata)
|
||||
|
||||
for (
|
||||
color,
|
||||
ndpi,
|
||||
|
|
|
@ -3875,6 +3875,51 @@ def tiff_ccitt_nometa2_img(tmp_path_factory, tmp_gray1_png):
|
|||
yield in_img
|
||||
in_img.unlink()
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def miff_cmyk16_img(tmp_path_factory, tmp_normal_png):
|
||||
in_img = tmp_path_factory.mktemp("miff_cmyk16") / "in.miff"
|
||||
subprocess.check_call(
|
||||
CONVERT
|
||||
+ [
|
||||
str(tmp_normal_png),
|
||||
"-depth",
|
||||
"16",
|
||||
"-colorspace",
|
||||
"cmyk",
|
||||
str(in_img),
|
||||
]
|
||||
)
|
||||
identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"]))
|
||||
assert len(identify) == 1
|
||||
# somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
|
||||
# put into an array, here we cater for the older version containing just
|
||||
# the bare dictionary
|
||||
if "image" in identify:
|
||||
identify = [identify]
|
||||
assert "image" in identify[0]
|
||||
assert identify[0]["image"].get("format") == "MIFF", str(identify)
|
||||
assert identify[0]["image"].get("geometry") == {
|
||||
"width": 60,
|
||||
"height": 60,
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
}, str(identify)
|
||||
assert identify[0]["image"].get("colorspace") == "CMYK", str(identify)
|
||||
assert identify[0]["image"].get("type") == "ColorSeparation", str(identify)
|
||||
endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
|
||||
assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
|
||||
identify
|
||||
) # FIXME: should be LSB
|
||||
assert identify[0]["image"].get("depth") == 16, str(identify)
|
||||
assert identify[0]["image"].get("baseDepth") == 16, str(identify)
|
||||
assert identify[0]["image"].get("pageGeometry") == {
|
||||
"width": 60,
|
||||
"height": 60,
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
}, str(identify)
|
||||
yield in_img
|
||||
in_img.unlink()
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def png_icc_img(tmp_icc_png):
|
||||
|
@ -5261,6 +5306,35 @@ def tiff_ccitt_nometa2_pdf(tmp_path_factory, tiff_ccitt_nometa2_img, request):
|
|||
out_pdf.unlink()
|
||||
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", params=["internal", "pikepdf"])
|
||||
def miff_cmyk16_pdf(tmp_path_factory, miff_cmyk16_img, request):
|
||||
out_pdf = tmp_path_factory.mktemp("miff_cmyk16_pdf") / "out.pdf"
|
||||
subprocess.check_call(
|
||||
[
|
||||
img2pdfprog,
|
||||
"--producer=",
|
||||
"--nodate",
|
||||
"--engine=" + request.param,
|
||||
"--output=" + str(out_pdf),
|
||||
str(miff_cmyk16_img),
|
||||
]
|
||||
)
|
||||
with pikepdf.open(str(out_pdf)) as p:
|
||||
assert (
|
||||
p.pages[0].Contents.read_bytes()
|
||||
== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
|
||||
)
|
||||
assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 16
|
||||
assert p.pages[0].Resources.XObject.Im0.ColorSpace == "/DeviceCMYK"
|
||||
assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode"
|
||||
assert p.pages[0].Resources.XObject.Im0.Height == 60
|
||||
assert p.pages[0].Resources.XObject.Im0.Width == 60
|
||||
yield out_pdf
|
||||
out_pdf.unlink()
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
# TEST CASES #
|
||||
###############################################################################
|
||||
|
@ -6123,6 +6197,20 @@ def test_tiff_ccitt_nometa2(
|
|||
compare_pdfimages_tiff(tmpdir, tiff_ccitt_nometa2_img, tiff_ccitt_nometa2_pdf)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform in ["win32"],
|
||||
reason="test utilities not available on Windows and MacOS",
|
||||
)
|
||||
def test_miff_cmyk16(tmp_path_factory, miff_cmyk16_img, tiff_cmyk16_img, miff_cmyk16_pdf):
|
||||
tmpdir = tmp_path_factory.mktemp("miff_cmyk16")
|
||||
compare_ghostscript(
|
||||
tmpdir, tiff_cmyk16_img, miff_cmyk16_pdf, gsdevice="tiff32nc", exact=False
|
||||
)
|
||||
# not testing with poppler as it cannot write CMYK images
|
||||
compare_mupdf(tmpdir, tiff_cmyk16_img, miff_cmyk16_pdf, exact=False, cmyk=True)
|
||||
#compare_pdfimages_tiff(tmpdir, tiff_cmyk16_img, miff_cmyk16_pdf)
|
||||
|
||||
|
||||
# we define some variables so that the table below can be narrower
|
||||
psl = (972, 504) # --pagesize landscape
|
||||
psp = (504, 972) # --pagesize portrait
|
||||
|
|
Loading…
Reference in a new issue