@ -128,7 +128,7 @@ PageOrientation = Enum("PageOrientation", "portrait landscape")
Colorspace = Enum ( " Colorspace " , " RGB RGBA L LA 1 CMYK CMYK;I P PA other " )
ImageFormat = Enum (
" ImageFormat " , " JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other"
" ImageFormat " , " JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF JBIG2 other"
)
PageMode = Enum ( " PageMode " , " none outlines thumbs " )
@ -918,6 +918,11 @@ class pdfdoc(object):
self . output_version = " 1.5 " # jpeg2000 needs pdf 1.5
elif imgformat is ImageFormat . CCITTGroup4 :
ofilter = [ PdfName . CCITTFaxDecode ]
elif imgformat is ImageFormat . JBIG2 :
ofilter = PdfName . JBIG2Decode
# JBIG2Decode requires PDF 1.4
if self . output_version < " 1.4 " :
self . output_version = " 1.4 "
else :
ofilter = PdfName . FlateDecode
@ -1308,6 +1313,19 @@ def get_imgmetadata(
if vdpi is None :
vdpi = default_dpi
ndpi = ( hdpi , vdpi )
elif imgformat == ImageFormat . JBIG2 :
imgwidthpx , imgheightpx , xres , yres = struct . unpack ( ' >IIII ' , rawdata [ 24 : 40 ] )
INCH_PER_METER = 39.370079
if xres == 0 :
hdpi = default_dpi
else :
hdpi = int ( float ( xres ) / INCH_PER_METER )
if yres == 0 :
vdpi = default_dpi
else :
vdpi = int ( float ( yres ) / INCH_PER_METER )
ndpi = ( hdpi , vdpi )
ics = " 1 "
else :
imgwidthpx , imgheightpx = imgdata . size
@ -1334,7 +1352,7 @@ def get_imgmetadata(
# GIF and PNG files with transparency are supported
if imgformat in [ ImageFormat . PNG , ImageFormat . GIF , ImageFormat . JPEG2000 ] and (
ics in [ " RGBA " , " LA " ] or " transparency " in imgdata . info
ics in [ " RGBA " , " LA " ] or ( imgdata is not None and " transparency " in imgdata . info )
) :
# Must check the IHDR chunk for the bit depth, because PIL would lossily
# convert 16-bit RGBA/LA images to 8-bit.
@ -1350,7 +1368,7 @@ def get_imgmetadata(
raise AlphaChannelError (
" Refusing to work with multiple >8bit channels. "
)
elif ics in [ " LA " , " PA " , " RGBA " ] or " transparency " in imgdata . info :
elif ics in [ " LA " , " PA " , " RGBA " ] or ( imgdata is not None and " transparency " in imgdata . info ) :
raise AlphaChannelError ( " This function must not be called on images with alpha " )
# Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0
@ -1455,7 +1473,7 @@ def get_imgmetadata(
logger . debug ( " input colorspace = %s " , color . name )
iccp = None
if " icc_profile " in imgdata . info :
if imgdata is not None and " icc_profile " in imgdata . info :
iccp = imgdata . info . get ( " icc_profile " )
# GIMP saves bilevel TIFF images and palette PNG images with only black and
# white in the palette with an RGB ICC profile which is useless
@ -1805,8 +1823,6 @@ def parse_miff(data):
results . extend ( parse_miff ( rest [ lenpal + lenimgdata : ] ) )
return results
# fmt: on
def read_images (
rawdata , colorspace , first_frame_only = False , rot = None , include_thumbnails = False
) :
@ -1820,7 +1836,42 @@ def read_images(
if rawdata [ : 12 ] == b " \x00 \x00 \x00 \x0C \x6A \x50 \x20 \x20 \x0D \x0A \x87 \x0A " :
# image is jpeg2000
imgformat = ImageFormat . JPEG2000
if rawdata [ : 14 ] . lower ( ) == b " id=imagemagick " :
elif rawdata [ : 8 ] == b " \x97 \x4a \x42 \x32 \x0d \x0a \x1a \x0a " :
# For now we only support single-page generic coding of JBIG2, for example as generated by
# https://github.com/agl/jbig2enc
#
# In fact, you can pipe an example image like 042.bmp from https://git.ghostscript.com/?p=tests.git;a=blob_plain;f=jbig2/042.bmp;hb=HEAD
# directly into img2pdf:
# jbig2 042.bmp | img2pdf > 042.pdf
#
# For this we assume that the first 13 bytes are the JBIG file header describing a document with one page,
# followed by a "page information" segment describing the dimensions of that page.
#
# The following annotated `hexdump -C 042.jb2` shows the first 40 bytes that we inspect directly.
# The first 24 bytes (until "||") have to match exactly, while the following 16 bytes are read by get_imgmetadata.
#
# 97 4a 42 32 0d 0a 1a 0a 01 00 00 00 01 00 00 00
# \_____________________/ | \_________/ \______
# magic-bytes org/unk pages seg-num
#
# 00 30 00 01 00 00 00 13 || 00 00 06 c0 00 00 09 23
# _/ | | | \_________/ || \_________/ \_________/
# type refs page seg-size || width-px height-px
#
# 00 00 00 00 00 00 00 00
# \_________/ \_________/
# xres yres
#
# For more information on the data format, see:
# * https://github.com/agl/jbig2enc/blob/ea05019/fcd14492.pdf
# For more information about the generic coding, see:
# * https://github.com/agl/jbig2enc/blob/ea05019/src/jbig2enc.cc#L898
imgformat = ImageFormat . JBIG2
if rawdata [ : 24 ] != b " \x97 \x4a \x42 \x32 \x0d \x0a \x1a \x0a \x01 \x00 \x00 \x00 \x01 \x00 \x00 \x00 \x00 \x30 \x00 \x01 \x00 \x00 \x00 \x13 " :
raise ImageOpenError (
" Unsupported JBIG2 format; only single-page generic coding is supported (e.g. from `jbig2enc`) "
)
elif rawdata [ : 14 ] . lower ( ) == b " id=imagemagick " :
# image is in MIFF format
# this is useful for 16 bit CMYK because PNG cannot do CMYK and thus
# we need PIL but PIL cannot do 16 bit
@ -2066,6 +2117,28 @@ def read_images(
)
]
if imgformat == ImageFormat . JBIG2 :
color , ndpi , imgwidthpx , imgheightpx , rotation , iccp = get_imgmetadata (
imgdata , imgformat , default_dpi , colorspace , rawdata , rot
)
streamdata = rawdata [ 13 : ] # Strip file header
return [
(
color ,
ndpi ,
imgformat ,
streamdata ,
None ,
imgwidthpx ,
imgheightpx ,
[ ] ,
False ,
1 ,
rotation ,
iccp ,
)
]
if imgformat == ImageFormat . MIFF :
return parse_miff ( rawdata )