|
|
|
@ -128,7 +128,7 @@ PageOrientation = Enum("PageOrientation", "portrait landscape")
|
|
|
|
|
Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other")
|
|
|
|
|
|
|
|
|
|
ImageFormat = Enum(
|
|
|
|
|
"ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other"
|
|
|
|
|
"ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF JBIG2 other"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
PageMode = Enum("PageMode", "none outlines thumbs")
|
|
|
|
@ -918,6 +918,11 @@ class pdfdoc(object):
|
|
|
|
|
self.output_version = "1.5" # jpeg2000 needs pdf 1.5
|
|
|
|
|
elif imgformat is ImageFormat.CCITTGroup4:
|
|
|
|
|
ofilter = [PdfName.CCITTFaxDecode]
|
|
|
|
|
elif imgformat is ImageFormat.JBIG2:
|
|
|
|
|
ofilter = PdfName.JBIG2Decode
|
|
|
|
|
# JBIG2Decode requires PDF 1.4
|
|
|
|
|
if self.output_version < "1.4":
|
|
|
|
|
self.output_version = "1.4"
|
|
|
|
|
else:
|
|
|
|
|
ofilter = PdfName.FlateDecode
|
|
|
|
|
|
|
|
|
@ -1308,6 +1313,25 @@ def get_imgmetadata(
|
|
|
|
|
if vdpi is None:
|
|
|
|
|
vdpi = default_dpi
|
|
|
|
|
ndpi = (hdpi, vdpi)
|
|
|
|
|
elif imgformat == ImageFormat.JBIG2:
|
|
|
|
|
imgwidthpx, imgheightpx, xres, yres = struct.unpack('>IIII', rawdata[24:40])
|
|
|
|
|
INCH_PER_METER = 39.370079
|
|
|
|
|
if xres == 0:
|
|
|
|
|
hdpi = default_dpi
|
|
|
|
|
elif xres < 1000:
|
|
|
|
|
# If xres is very small, it's likely accidentally expressed in dpi instead
|
|
|
|
|
# of dpm. See e.g. https://github.com/agl/jbig2enc/issues/86
|
|
|
|
|
hdpi = xres
|
|
|
|
|
else:
|
|
|
|
|
hdpi = int(float(xres) / INCH_PER_METER)
|
|
|
|
|
if yres == 0:
|
|
|
|
|
vdpi = default_dpi
|
|
|
|
|
elif yres < 1000:
|
|
|
|
|
vdpi = yres
|
|
|
|
|
else:
|
|
|
|
|
vdpi = int(float(yres) / INCH_PER_METER)
|
|
|
|
|
ndpi = (hdpi, vdpi)
|
|
|
|
|
ics = "1"
|
|
|
|
|
else:
|
|
|
|
|
imgwidthpx, imgheightpx = imgdata.size
|
|
|
|
|
|
|
|
|
@ -1334,7 +1358,7 @@ def get_imgmetadata(
|
|
|
|
|
|
|
|
|
|
# GIF and PNG files with transparency are supported
|
|
|
|
|
if imgformat in [ImageFormat.PNG, ImageFormat.GIF, ImageFormat.JPEG2000] and (
|
|
|
|
|
ics in ["RGBA", "LA"] or "transparency" in imgdata.info
|
|
|
|
|
ics in ["RGBA", "LA"] or (imgdata is not None and "transparency" in imgdata.info)
|
|
|
|
|
):
|
|
|
|
|
# Must check the IHDR chunk for the bit depth, because PIL would lossily
|
|
|
|
|
# convert 16-bit RGBA/LA images to 8-bit.
|
|
|
|
@ -1350,7 +1374,7 @@ def get_imgmetadata(
|
|
|
|
|
raise AlphaChannelError(
|
|
|
|
|
"Refusing to work with multiple >8bit channels."
|
|
|
|
|
)
|
|
|
|
|
elif ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info:
|
|
|
|
|
elif ics in ["LA", "PA", "RGBA"] or (imgdata is not None and "transparency" in imgdata.info):
|
|
|
|
|
raise AlphaChannelError("This function must not be called on images with alpha")
|
|
|
|
|
|
|
|
|
|
# Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0
|
|
|
|
@ -1455,7 +1479,7 @@ def get_imgmetadata(
|
|
|
|
|
logger.debug("input colorspace = %s", color.name)
|
|
|
|
|
|
|
|
|
|
iccp = None
|
|
|
|
|
if "icc_profile" in imgdata.info:
|
|
|
|
|
if imgdata is not None and "icc_profile" in imgdata.info:
|
|
|
|
|
iccp = imgdata.info.get("icc_profile")
|
|
|
|
|
# GIMP saves bilevel TIFF images and palette PNG images with only black and
|
|
|
|
|
# white in the palette with an RGB ICC profile which is useless
|
|
|
|
@ -1805,8 +1829,6 @@ def parse_miff(data):
|
|
|
|
|
results.extend(parse_miff(rest[lenpal + lenimgdata :]))
|
|
|
|
|
return results
|
|
|
|
|
# fmt: on
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_images(
|
|
|
|
|
rawdata, colorspace, first_frame_only=False, rot=None, include_thumbnails=False
|
|
|
|
|
):
|
|
|
|
@ -1820,7 +1842,41 @@ def read_images(
|
|
|
|
|
if rawdata[:12] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
|
|
|
|
# image is jpeg2000
|
|
|
|
|
imgformat = ImageFormat.JPEG2000
|
|
|
|
|
if rawdata[:14].lower() == b"id=imagemagick":
|
|
|
|
|
elif rawdata[:8] == b"\x97\x4a\x42\x32\x0d\x0a\x1a\x0a":
|
|
|
|
|
# For now we only support single-page generic coding of JBIG2, for example as generated by
|
|
|
|
|
# https://github.com/agl/jbig2enc
|
|
|
|
|
#
|
|
|
|
|
# In fact, you can pipe an example image `like src/tests/input/mono.png` directly into img2pdf:
|
|
|
|
|
# jbig2 src/tests/input/mono.png | img2pdf -o src/tests/output/mono.png.pdf
|
|
|
|
|
#
|
|
|
|
|
# For this we assume that the first 13 bytes are the JBIG file header describing a document with one page,
|
|
|
|
|
# followed by a "page information" segment describing the dimensions of that page.
|
|
|
|
|
#
|
|
|
|
|
# The following annotated `hexdump -C 042.jb2` shows the first 40 bytes that we inspect directly.
|
|
|
|
|
# The first 24 bytes (until "||") have to match exactly, while the following 16 bytes are read by get_imgmetadata.
|
|
|
|
|
#
|
|
|
|
|
# 97 4a 42 32 0d 0a 1a 0a 01 00 00 00 01 00 00 00
|
|
|
|
|
# \_____________________/ | \_________/ \______
|
|
|
|
|
# magic-bytes org/unk pages seg-num
|
|
|
|
|
#
|
|
|
|
|
# 00 30 00 01 00 00 00 13 || 00 00 00 73 00 00 00 30
|
|
|
|
|
# _/ | | | \_________/ || \_________/ \_________/
|
|
|
|
|
# type refs page seg-size || width-px height-px
|
|
|
|
|
#
|
|
|
|
|
# 00 00 00 48 00 00 00 48
|
|
|
|
|
# \_________/ \_________/
|
|
|
|
|
# xres yres
|
|
|
|
|
#
|
|
|
|
|
# For more information on the data format, see:
|
|
|
|
|
# * https://github.com/agl/jbig2enc/blob/ea05019/fcd14492.pdf
|
|
|
|
|
# For more information about the generic coding, see:
|
|
|
|
|
# * https://github.com/agl/jbig2enc/blob/ea05019/src/jbig2enc.cc#L898
|
|
|
|
|
imgformat = ImageFormat.JBIG2
|
|
|
|
|
if rawdata[:24] != b"\x97\x4a\x42\x32\x0d\x0a\x1a\x0a\x01\x00\x00\x00\x01\x00\x00\x00\x00\x30\x00\x01\x00\x00\x00\x13":
|
|
|
|
|
raise ImageOpenError(
|
|
|
|
|
"Unsupported JBIG2 format; only single-page generic coding is supported (e.g. from `jbig2enc`)"
|
|
|
|
|
)
|
|
|
|
|
elif rawdata[:14].lower() == b"id=imagemagick":
|
|
|
|
|
# image is in MIFF format
|
|
|
|
|
# this is useful for 16 bit CMYK because PNG cannot do CMYK and thus
|
|
|
|
|
# we need PIL but PIL cannot do 16 bit
|
|
|
|
@ -2066,6 +2122,28 @@ def read_images(
|
|
|
|
|
)
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
if imgformat == ImageFormat.JBIG2:
|
|
|
|
|
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
|
|
|
|
|
imgdata, imgformat, default_dpi, colorspace, rawdata, rot
|
|
|
|
|
)
|
|
|
|
|
streamdata = rawdata[13:] # Strip file header
|
|
|
|
|
return [
|
|
|
|
|
(
|
|
|
|
|
color,
|
|
|
|
|
ndpi,
|
|
|
|
|
imgformat,
|
|
|
|
|
streamdata,
|
|
|
|
|
None,
|
|
|
|
|
imgwidthpx,
|
|
|
|
|
imgheightpx,
|
|
|
|
|
[],
|
|
|
|
|
False,
|
|
|
|
|
1,
|
|
|
|
|
rotation,
|
|
|
|
|
iccp,
|
|
|
|
|
)
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
if imgformat == ImageFormat.MIFF:
|
|
|
|
|
return parse_miff(rawdata)
|
|
|
|
|
|
|
|
|
|