forked from josch/img2pdf
parent
d9b90499f3
commit
bad6fcae39
2 changed files with 269 additions and 4 deletions
185
src/img2pdf.py
185
src/img2pdf.py
|
@ -45,6 +45,7 @@ import struct
|
||||||
import platform
|
import platform
|
||||||
import hashlib
|
import hashlib
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
import re
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -125,7 +126,9 @@ PageOrientation = Enum("PageOrientation", "portrait landscape")
|
||||||
|
|
||||||
Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other")
|
Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other")
|
||||||
|
|
||||||
ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO other")
|
ImageFormat = Enum(
|
||||||
|
"ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other"
|
||||||
|
)
|
||||||
|
|
||||||
PageMode = Enum("PageMode", "none outlines thumbs")
|
PageMode = Enum("PageMode", "none outlines thumbs")
|
||||||
|
|
||||||
|
@ -1533,6 +1536,166 @@ def parse_png(rawdata):
|
||||||
return pngidat, palette
|
return pngidat, palette
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
miff_re = re.compile(
|
||||||
|
r"""
|
||||||
|
[^\x00-\x20\x7f-\x9f] # the field name must not start with a control char or space
|
||||||
|
[^=]+ # the field name can even contain spaces
|
||||||
|
= # field name and value are separated by an equal sign
|
||||||
|
(?:
|
||||||
|
[^\x00-\x20\x7f-\x9f{}] # either chars that are not braces and not control chars
|
||||||
|
|{[^}]*} # or any kind of char surrounded by braces
|
||||||
|
)+""",
|
||||||
|
re.VERBOSE,
|
||||||
|
)
|
||||||
|
|
||||||
|
# https://imagemagick.org/script/miff.php
|
||||||
|
def parse_miff(data):
|
||||||
|
results = []
|
||||||
|
header, rest = data.split(b":\x1a", 1)
|
||||||
|
header = header.decode("ISO-8859-1")
|
||||||
|
assert header.lower().startswith("id=imagemagick")
|
||||||
|
hdata = {}
|
||||||
|
for i, line in enumerate(re.findall(miff_re, header)):
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
k, v = line.split("=", 1)
|
||||||
|
if i == 0:
|
||||||
|
assert k.lower() == "id"
|
||||||
|
assert v.lower() == "imagemagick"
|
||||||
|
match k.lower():
|
||||||
|
case "class":
|
||||||
|
match v:
|
||||||
|
case "DirectClass" | "PseudoClass":
|
||||||
|
hdata["class"] = v
|
||||||
|
case _:
|
||||||
|
print("cannot understand class", v)
|
||||||
|
case "colorspace":
|
||||||
|
# theoretically RGBA and CMYKA should be supported as well
|
||||||
|
# please teach me how to create such a MIFF file
|
||||||
|
match v:
|
||||||
|
case "sRGB" | "CMYK" | "Gray":
|
||||||
|
hdata["colorspace"] = v
|
||||||
|
case _:
|
||||||
|
print("cannot understand colorspace", v)
|
||||||
|
case "depth":
|
||||||
|
match v:
|
||||||
|
case "8" | "16" | "32":
|
||||||
|
hdata["depth"] = int(v)
|
||||||
|
case _:
|
||||||
|
print("cannot understand depth", v)
|
||||||
|
case "colors":
|
||||||
|
hdata["colors"] = int(v)
|
||||||
|
case "matte":
|
||||||
|
match v:
|
||||||
|
case "True":
|
||||||
|
hdata["matte"] = True
|
||||||
|
case "False":
|
||||||
|
hdata["matte"] = False
|
||||||
|
case _:
|
||||||
|
print("cannot understand matte", v)
|
||||||
|
case "columns" | "rows":
|
||||||
|
hdata[k.lower()] = int(v)
|
||||||
|
case "compression":
|
||||||
|
print("compression not yet supported")
|
||||||
|
case "profile":
|
||||||
|
assert v in ["icc", "exif"]
|
||||||
|
hdata["profile"] = v
|
||||||
|
case "resolution":
|
||||||
|
dpix, dpiy = v.split("x", 1)
|
||||||
|
hdata["resolution"] = (float(dpix), float(dpiy))
|
||||||
|
|
||||||
|
assert "depth" in hdata
|
||||||
|
assert "columns" in hdata
|
||||||
|
assert "rows" in hdata
|
||||||
|
match hdata["class"]:
|
||||||
|
case "DirectClass":
|
||||||
|
if "colors" in hdata:
|
||||||
|
assert hdata["colors"] == 0
|
||||||
|
match hdata["colorspace"]:
|
||||||
|
case "sRGB":
|
||||||
|
numchannels = 3
|
||||||
|
colorspace = Colorspace.RGB
|
||||||
|
case "CMYK":
|
||||||
|
numchannels = 4
|
||||||
|
colorspace = Colorspace.CMYK
|
||||||
|
case "Gray":
|
||||||
|
numchannels = 1
|
||||||
|
colorspace = Colorspace.L
|
||||||
|
if hdata["matte"]:
|
||||||
|
numchannels += 1
|
||||||
|
if hdata.get("profile"):
|
||||||
|
# there is no key encoding the length of icc or exif data
|
||||||
|
# according to the docs, the profile-icc key is supposed to do this
|
||||||
|
print("FAIL: exif")
|
||||||
|
else:
|
||||||
|
lenimgdata = (
|
||||||
|
hdata["depth"] // 8 * numchannels * hdata["columns"] * hdata["rows"]
|
||||||
|
)
|
||||||
|
assert len(rest) >= lenimgdata, (
|
||||||
|
len(rest),
|
||||||
|
hdata["depth"],
|
||||||
|
numchannels,
|
||||||
|
hdata["columns"],
|
||||||
|
hdata["rows"],
|
||||||
|
lenimgdata,
|
||||||
|
)
|
||||||
|
results.append(
|
||||||
|
(
|
||||||
|
colorspace,
|
||||||
|
hdata.get("resolution") or (default_dpi, default_dpi),
|
||||||
|
ImageFormat.MIFF,
|
||||||
|
zlib.compress(rest[:lenimgdata]),
|
||||||
|
None, # smask
|
||||||
|
hdata["columns"],
|
||||||
|
hdata["rows"],
|
||||||
|
[], # palette
|
||||||
|
False, # inverted
|
||||||
|
hdata["depth"],
|
||||||
|
0, # rotation
|
||||||
|
None, # icc profile
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if len(rest) > lenimgdata:
|
||||||
|
# another image is here
|
||||||
|
assert rest[lenimgdata:][:14].lower() == b"id=imagemagick"
|
||||||
|
results.extend(parse_miff(rest[lenimgdata:]))
|
||||||
|
case "PseudoClass":
|
||||||
|
assert "colors" in hdata
|
||||||
|
if hdata["matte"]:
|
||||||
|
numchannels = 2
|
||||||
|
else:
|
||||||
|
numchannels = 1
|
||||||
|
lenpal = 3 * hdata["colors"] * hdata["depth"] // 8
|
||||||
|
lenimgdata = numchannels * hdata["rows"] * hdata["columns"]
|
||||||
|
assert len(rest) >= lenpal + lenimgdata, (len(rest), lenpal, lenimgdata)
|
||||||
|
results.append(
|
||||||
|
(
|
||||||
|
Colorspace.RGB,
|
||||||
|
hdata.get("resolution") or (default_dpi, default_dpi),
|
||||||
|
ImageFormat.MIFF,
|
||||||
|
zlib.compress(rest[lenpal : lenpal + lenimgdata]),
|
||||||
|
None, # FIXME: allow alpha channel smask
|
||||||
|
hdata["columns"],
|
||||||
|
hdata["rows"],
|
||||||
|
rest[:lenpal], # palette
|
||||||
|
False, # inverted
|
||||||
|
hdata["depth"],
|
||||||
|
0, # rotation
|
||||||
|
None, # icc profile
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if len(rest) > lenpal + lenimgdata:
|
||||||
|
# another image is here
|
||||||
|
assert rest[lenpal + lenimgdata :][:14].lower() == b"id=imagemagick", (
|
||||||
|
len(rest),
|
||||||
|
lenpal,
|
||||||
|
lenimgdata,
|
||||||
|
)
|
||||||
|
results.extend(parse_miff(rest[lenpal + lenimgdata :]))
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
im = BytesIO(rawdata)
|
im = BytesIO(rawdata)
|
||||||
im.seek(0)
|
im.seek(0)
|
||||||
|
@ -1541,13 +1704,19 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
imgdata = Image.open(im)
|
imgdata = Image.open(im)
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
# test if it is a jpeg2000 image
|
# test if it is a jpeg2000 image
|
||||||
if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
if rawdata[:12] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
||||||
|
# image is jpeg2000
|
||||||
|
imgformat = ImageFormat.JPEG2000
|
||||||
|
if rawdata[:14].lower() == b"id=imagemagick":
|
||||||
|
# image is in MIFF format
|
||||||
|
# this is useful for 16 bit CMYK because PNG cannot do CMYK and thus
|
||||||
|
# we need PIL but PIL cannot do 16 bit
|
||||||
|
imgformat = ImageFormat.MIFF
|
||||||
|
else:
|
||||||
raise ImageOpenError(
|
raise ImageOpenError(
|
||||||
"cannot read input image (not jpeg2000). "
|
"cannot read input image (not jpeg2000). "
|
||||||
"PIL: error reading image: %s" % e
|
"PIL: error reading image: %s" % e
|
||||||
)
|
)
|
||||||
# image is jpeg2000
|
|
||||||
imgformat = ImageFormat.JPEG2000
|
|
||||||
else:
|
else:
|
||||||
logger.debug("PIL format = %s", imgdata.format)
|
logger.debug("PIL format = %s", imgdata.format)
|
||||||
imgformat = None
|
imgformat = None
|
||||||
|
@ -1710,6 +1879,10 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
if imgformat == ImageFormat.MIFF:
|
||||||
|
return parse_miff(rawdata)
|
||||||
|
|
||||||
# If our input is not JPEG or PNG, then we might have a format that
|
# If our input is not JPEG or PNG, then we might have a format that
|
||||||
# supports multiple frames (like TIFF or GIF), so we need a loop to
|
# supports multiple frames (like TIFF or GIF), so we need a loop to
|
||||||
# iterate through all frames of the image.
|
# iterate through all frames of the image.
|
||||||
|
@ -2344,6 +2517,10 @@ def convert(*images, **kwargs):
|
||||||
rawdata = f.read()
|
rawdata = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
#md5 = hashlib.md5(rawdata).hexdigest()
|
||||||
|
#with open("./testdata/" + md5, "wb") as f:
|
||||||
|
# f.write(rawdata)
|
||||||
|
|
||||||
for (
|
for (
|
||||||
color,
|
color,
|
||||||
ndpi,
|
ndpi,
|
||||||
|
|
|
@ -3875,6 +3875,51 @@ def tiff_ccitt_nometa2_img(tmp_path_factory, tmp_gray1_png):
|
||||||
yield in_img
|
yield in_img
|
||||||
in_img.unlink()
|
in_img.unlink()
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def miff_cmyk16_img(tmp_path_factory, tmp_normal_png):
|
||||||
|
in_img = tmp_path_factory.mktemp("miff_cmyk16") / "in.miff"
|
||||||
|
subprocess.check_call(
|
||||||
|
CONVERT
|
||||||
|
+ [
|
||||||
|
str(tmp_normal_png),
|
||||||
|
"-depth",
|
||||||
|
"16",
|
||||||
|
"-colorspace",
|
||||||
|
"cmyk",
|
||||||
|
str(in_img),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"]))
|
||||||
|
assert len(identify) == 1
|
||||||
|
# somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
|
||||||
|
# put into an array, here we cater for the older version containing just
|
||||||
|
# the bare dictionary
|
||||||
|
if "image" in identify:
|
||||||
|
identify = [identify]
|
||||||
|
assert "image" in identify[0]
|
||||||
|
assert identify[0]["image"].get("format") == "MIFF", str(identify)
|
||||||
|
assert identify[0]["image"].get("geometry") == {
|
||||||
|
"width": 60,
|
||||||
|
"height": 60,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0,
|
||||||
|
}, str(identify)
|
||||||
|
assert identify[0]["image"].get("colorspace") == "CMYK", str(identify)
|
||||||
|
assert identify[0]["image"].get("type") == "ColorSeparation", str(identify)
|
||||||
|
endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
|
||||||
|
assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
|
||||||
|
identify
|
||||||
|
) # FIXME: should be LSB
|
||||||
|
assert identify[0]["image"].get("depth") == 16, str(identify)
|
||||||
|
assert identify[0]["image"].get("baseDepth") == 16, str(identify)
|
||||||
|
assert identify[0]["image"].get("pageGeometry") == {
|
||||||
|
"width": 60,
|
||||||
|
"height": 60,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0,
|
||||||
|
}, str(identify)
|
||||||
|
yield in_img
|
||||||
|
in_img.unlink()
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def png_icc_img(tmp_icc_png):
|
def png_icc_img(tmp_icc_png):
|
||||||
|
@ -5261,6 +5306,35 @@ def tiff_ccitt_nometa2_pdf(tmp_path_factory, tiff_ccitt_nometa2_img, request):
|
||||||
out_pdf.unlink()
|
out_pdf.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", params=["internal", "pikepdf"])
|
||||||
|
def miff_cmyk16_pdf(tmp_path_factory, miff_cmyk16_img, request):
|
||||||
|
out_pdf = tmp_path_factory.mktemp("miff_cmyk16_pdf") / "out.pdf"
|
||||||
|
subprocess.check_call(
|
||||||
|
[
|
||||||
|
img2pdfprog,
|
||||||
|
"--producer=",
|
||||||
|
"--nodate",
|
||||||
|
"--engine=" + request.param,
|
||||||
|
"--output=" + str(out_pdf),
|
||||||
|
str(miff_cmyk16_img),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
with pikepdf.open(str(out_pdf)) as p:
|
||||||
|
assert (
|
||||||
|
p.pages[0].Contents.read_bytes()
|
||||||
|
== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
|
||||||
|
)
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 16
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.ColorSpace == "/DeviceCMYK"
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode"
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.Height == 60
|
||||||
|
assert p.pages[0].Resources.XObject.Im0.Width == 60
|
||||||
|
yield out_pdf
|
||||||
|
out_pdf.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# TEST CASES #
|
# TEST CASES #
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
@ -6123,6 +6197,20 @@ def test_tiff_ccitt_nometa2(
|
||||||
compare_pdfimages_tiff(tmpdir, tiff_ccitt_nometa2_img, tiff_ccitt_nometa2_pdf)
|
compare_pdfimages_tiff(tmpdir, tiff_ccitt_nometa2_img, tiff_ccitt_nometa2_pdf)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
sys.platform in ["win32"],
|
||||||
|
reason="test utilities not available on Windows and MacOS",
|
||||||
|
)
|
||||||
|
def test_miff_cmyk16(tmp_path_factory, miff_cmyk16_img, tiff_cmyk16_img, miff_cmyk16_pdf):
|
||||||
|
tmpdir = tmp_path_factory.mktemp("miff_cmyk16")
|
||||||
|
compare_ghostscript(
|
||||||
|
tmpdir, tiff_cmyk16_img, miff_cmyk16_pdf, gsdevice="tiff32nc", exact=False
|
||||||
|
)
|
||||||
|
# not testing with poppler as it cannot write CMYK images
|
||||||
|
compare_mupdf(tmpdir, tiff_cmyk16_img, miff_cmyk16_pdf, exact=False, cmyk=True)
|
||||||
|
#compare_pdfimages_tiff(tmpdir, tiff_cmyk16_img, miff_cmyk16_pdf)
|
||||||
|
|
||||||
|
|
||||||
# we define some variables so that the table below can be narrower
|
# we define some variables so that the table below can be narrower
|
||||||
psl = (972, 504) # --pagesize landscape
|
psl = (972, 504) # --pagesize landscape
|
||||||
psp = (504, 972) # --pagesize portrait
|
psp = (504, 972) # --pagesize portrait
|
||||||
|
|
Loading…
Reference in a new issue