@ -22,7 +22,7 @@ import sys
import os
import zlib
import argparse
from PIL import Image , TiffImagePlugin , GifImagePlugin
from PIL import Image , TiffImagePlugin , GifImagePlugin , ImageCms
if hasattr ( GifImagePlugin , " LoadingStrategy " ) :
# Pillow 9.0.0 started emitting all frames but the first as RGB instead of
@ -36,8 +36,8 @@ if hasattr(GifImagePlugin, "LoadingStrategy"):
# TiffImagePlugin.DEBUG = True
from PIL . ExifTags import TAGS
from datetime import datetime
from jp2 import parse jp2
from datetime import datetime , timezone
import jp2
from enum import Enum
from io import BytesIO
import logging
@ -45,6 +45,8 @@ import struct
import platform
import hashlib
from itertools import chain
import re
import io
logger = logging . getLogger ( __name__ )
@ -60,7 +62,7 @@ try:
except ImportError :
have_pikepdf = False
__version__ = " 0. 4.4 "
__version__ = " 0. 5.1 "
default_dpi = 96.0
papersizes = {
" letter " : " 8.5inx11in " ,
@ -125,7 +127,9 @@ PageOrientation = Enum("PageOrientation", "portrait landscape")
Colorspace = Enum ( " Colorspace " , " RGB RGBA L LA 1 CMYK CMYK;I P PA other " )
ImageFormat = Enum ( " ImageFormat " , " JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO other " )
ImageFormat = Enum (
" ImageFormat " , " JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other "
)
PageMode = Enum ( " PageMode " , " none outlines thumbs " )
@ -442,7 +446,7 @@ class temp_attr:
if hasattr ( self . obj , self . field ) :
self . exists = True
self . old_value = getattr ( self . obj , self . field )
print ( f " setting { self . obj } . { self . field } = { self . value } " )
logger . debug ( f " setting { self . obj } . { self . field } = { self . value } " )
setattr ( self . obj , self . field , self . value )
def __exit__ ( self , exctype , excinst , exctb ) :
@ -718,7 +722,7 @@ class pdfdoc(object):
self . writer . docinfo = PdfDict ( indirect = True )
def datetime_to_pdfdate ( dt ) :
return dt . strftime( " % Y % m %d % H % M % SZ " )
return dt . astimezone( tz = timezone . utc ) . strftime( " % Y % m %d % H % M % SZ " )
for k in [ " Title " , " Author " , " Creator " , " Producer " , " Subject " ] :
v = locals ( ) [ k . lower ( ) ]
@ -728,7 +732,7 @@ class pdfdoc(object):
v = PdfString . encode ( v )
self . writer . docinfo [ getattr ( PdfName , k ) ] = v
now = datetime . now ( )
now = datetime . now ( ) . astimezone ( )
for k in [ " CreationDate " , " ModDate " ] :
v = locals ( ) [ k . lower ( ) ]
if v is None and nodate :
@ -748,7 +752,7 @@ class pdfdoc(object):
)
def datetime_to_xmpdate ( dt ) :
return dt . strftime( " % Y- % m- %d T % H: % M: % SZ " )
return dt . astimezone( tz = timezone . utc ) . strftime( " % Y- % m- %d T % H: % M: % SZ " )
self . xmp = b """ <?xpacket begin= ' \xef \xbb \xbf ' id= ' W5M0MpCehiHzreSzNTczkc9d ' ?>
< x : xmpmeta xmlns : x = ' adobe:ns:meta/ ' x : xmptk = ' XMP toolkit 2.9.1-13, framework 1.6 ' >
@ -823,8 +827,10 @@ class pdfdoc(object):
artborder = None ,
iccp = None ,
) :
assert ( color != Colorspace . RGBA and color != Colorspace . LA ) or (
imgformat == ImageFormat . PNG and smaskdata is not None
assert (
color not in [ Colorspace . RGBA , Colorspace . LA ]
or ( imgformat == ImageFormat . PNG and smaskdata is not None )
or imgformat == ImageFormat . JPEG2000
)
if self . engine == Engine . pikepdf :
@ -848,7 +854,13 @@ class pdfdoc(object):
if color == Colorspace [ " 1 " ] or color == Colorspace . L or color == Colorspace . LA :
colorspace = PdfName . DeviceGray
elif color == Colorspace . RGB or color == Colorspace . RGBA :
colorspace = PdfName . DeviceRGB
if color == Colorspace . RGBA and imgformat == ImageFormat . JPEG2000 :
# there is no DeviceRGBA and for JPXDecode it is okay to have
# no colorspace as the pdf reader is supposed to get this info
# from the jpeg2000 payload itself
colorspace = None
else :
colorspace = PdfName . DeviceRGB
elif color == Colorspace . CMYK or color == Colorspace [ " CMYK;I " ] :
colorspace = PdfName . DeviceCMYK
elif color == Colorspace . P :
@ -919,7 +931,8 @@ class pdfdoc(object):
image [ PdfName . Filter ] = ofilter
image [ PdfName . Width ] = imgwidthpx
image [ PdfName . Height ] = imgheightpx
image [ PdfName . ColorSpace ] = colorspace
if colorspace is not None :
image [ PdfName . ColorSpace ] = colorspace
image [ PdfName . BitsPerComponent ] = depth
smask = None
@ -1256,8 +1269,11 @@ class pdfdoc(object):
# now write out the PDF
if self . engine == Engine . pikepdf :
kwargs = { }
if pikepdf . __version__ > = " 6.2.0 " :
kwargs [ " deterministic_id " ] = True
self . writer . save (
outputstream , min_version = self . output_version , linearize = True
outputstream , min_version = self . output_version , linearize = True , * * kwargs
)
elif self . engine == Engine . pdfrw :
self . writer . trailer . Info = self . writer . docinfo
@ -1285,7 +1301,7 @@ def get_imgmetadata(
if imgformat == ImageFormat . JPEG2000 and rawdata is not None and imgdata is None :
# this codepath gets called if the PIL installation is not able to
# handle JPEG2000 files
imgwidthpx , imgheightpx , ics , hdpi , vdpi = parsejp2 ( rawdata )
imgwidthpx , imgheightpx , ics , hdpi , vdpi , channels , bpp = jp2. parse( rawdata )
if hdpi is None :
hdpi = default_dpi
@ -1295,7 +1311,19 @@ def get_imgmetadata(
else :
imgwidthpx , imgheightpx = imgdata . size
ndpi = imgdata . info . get ( " dpi " , ( default_dpi , default_dpi ) )
ndpi = imgdata . info . get ( " dpi " )
if ndpi is None :
# the PNG plugin of PIL adds the undocumented "aspect" field instead of
# the "dpi" field if the PNG pHYs chunk unit is not set to meters
if imgformat == ImageFormat . PNG and imgdata . info . get ( " aspect " ) is not None :
aspect = imgdata . info [ " aspect " ]
# make sure not to go below the default dpi
if aspect [ 0 ] > aspect [ 1 ] :
ndpi = ( default_dpi * aspect [ 0 ] / aspect [ 1 ] , default_dpi )
else :
ndpi = ( default_dpi , default_dpi * aspect [ 1 ] / aspect [ 0 ] )
else :
ndpi = ( default_dpi , default_dpi )
# In python3, the returned dpi value for some tiff images will
# not be an integer but a float. To make the behaviour of
# img2pdf the same between python2 and python3, we convert that
@ -1305,7 +1333,7 @@ def get_imgmetadata(
ics = imgdata . mode
# GIF and PNG files with transparency are supported
if ( imgformat == ImageFormat . PNG or imgformat == ImageFormat . GIF ) and (
if imgformat in [ ImageFormat . PNG , ImageFormat . GIF , ImageFormat . JPEG2000 ] and (
ics in [ " RGBA " , " LA " ] or " transparency " in imgdata . info
) :
# Must check the IHDR chunk for the bit depth, because PIL would lossily
@ -1315,6 +1343,10 @@ def get_imgmetadata(
if depth > 8 :
logger . warning ( " Image with transparency and a bit depth of %d . " % depth )
logger . warning ( " This is unsupported due to PIL limitations. " )
logger . warning (
" If you accept a lossy conversion, you can manually convert "
" your images to 8 bit using `convert -depth 8` from imagemagick "
)
raise AlphaChannelError (
" Refusing to work with multiple >8bit channels. "
)
@ -1425,6 +1457,53 @@ def get_imgmetadata(
iccp = None
if " icc_profile " in imgdata . info :
iccp = imgdata . info . get ( " icc_profile " )
# GIMP saves bilevel TIFF images and palette PNG images with only black and
# white in the palette with an RGB ICC profile which is useless
# https://gitlab.gnome.org/GNOME/gimp/-/issues/3438
# and produces an error in Adobe Acrobat, so we ignore it with a warning.
# imagemagick also used to (wrongly) include an RGB ICC profile for bilevel
# images: https://github.com/ImageMagick/ImageMagick/issues/2070
if iccp is not None and (
( color == Colorspace [ " 1 " ] and imgformat == ImageFormat . TIFF )
or (
imgformat == ImageFormat . PNG
and color == Colorspace . P
and rawdata is not None
and parse_png ( rawdata ) [ 1 ]
in [ b " \x00 \x00 \x00 \xff \xff \xff " , b " \xff \xff \xff \x00 \x00 \x00 " ]
)
) :
with io . BytesIO ( iccp ) as f :
prf = ImageCms . ImageCmsProfile ( f )
if (
prf . profile . model == " sRGB "
and prf . profile . manufacturer == " GIMP "
and prf . profile . profile_description == " GIMP built-in sRGB "
) :
if imgformat == ImageFormat . TIFF :
logger . warning (
" Ignoring RGB ICC profile in bilevel TIFF produced by GIMP. "
)
elif imgformat == ImageFormat . PNG :
logger . warning (
" Ignoring RGB ICC profile in 2-color palette PNG produced by GIMP. "
)
logger . warning ( " https://gitlab.gnome.org/GNOME/gimp/-/issues/3438 " )
iccp = None
# SmartAlbums old version (found 2.2.6) exports JPG with only 1 compone
# with an RGB ICC profile which is useless.
# This produces an error in Adobe Acrobat, so we ignore it with a warning.
# Update: Found another case, the JPG is created by Adobe PhotoShop, so we
# don't check software anymore.
if iccp is not None and (
( color == Colorspace [ " L " ] and imgformat == ImageFormat . JPEG )
) :
with io . BytesIO ( iccp ) as f :
prf = ImageCms . ImageCmsProfile ( f )
if prf . profile . xcolor_space not in ( " GRAY " ) :
logger . warning ( " Ignoring non-GRAY ICC profile in Grayscale JPG " )
iccp = None
logger . debug ( " width x height = %d px x %d px " , imgwidthpx , imgheightpx )
@ -1533,7 +1612,204 @@ def parse_png(rawdata):
return pngidat , palette
def read_images ( rawdata , colorspace , first_frame_only = False , rot = None ) :
miff_re = re . compile (
r """
[ ^ \x00 - \x20 \x7f - \x9f ] # the field name must not start with a control char or space
[ ^ = ] + # the field name can even contain spaces
= # field name and value are separated by an equal sign
( ? :
[ ^ \x00 - \x20 \x7f - \x9f { } ] # either chars that are not braces and not control chars
| { [ ^ } ] * } # or any kind of char surrounded by braces
) + """ ,
re . VERBOSE ,
)
# https://imagemagick.org/script/miff.php
# turn off black formatting until python 3.10 is available on more platforms
# and we can use match/case
# fmt: off
def parse_miff ( data ) :
results = [ ]
header , rest = data . split ( b " : \x1a " , 1 )
header = header . decode ( " ISO-8859-1 " )
assert header . lower ( ) . startswith ( " id=imagemagick " )
hdata = { }
for i , line in enumerate ( re . findall ( miff_re , header ) ) :
if not line :
continue
k , v = line . split ( " = " , 1 )
if i == 0 :
assert k . lower ( ) == " id "
assert v . lower ( ) == " imagemagick "
#match k.lower():
# case "class":
if k . lower ( ) == " class " :
#match v:
# case "DirectClass" | "PseudoClass":
if v in [ " DirectClass " , " PseudoClass " ] :
hdata [ " class " ] = v
# case _:
else :
print ( " cannot understand class " , v )
# case "colorspace":
elif k . lower ( ) == " colorspace " :
# theoretically RGBA and CMYKA should be supported as well
# please teach me how to create such a MIFF file
#match v:
# case "sRGB" | "CMYK" | "Gray":
if v in [ " sRGB " , " CMYK " , " Gray " ] :
hdata [ " colorspace " ] = v
# case _:
else :
print ( " cannot understand colorspace " , v )
# case "depth":
elif k . lower ( ) == " depth " :
#match v:
# case "8" | "16" | "32":
if v in [ " 8 " , " 16 " , " 32 " ] :
hdata [ " depth " ] = int ( v )
# case _:
else :
print ( " cannot understand depth " , v )
# case "colors":
elif k . lower ( ) == " colors " :
hdata [ " colors " ] = int ( v )
# case "matte":
elif k . lower ( ) == " matte " :
#match v:
# case "True":
if v == " True " :
hdata [ " matte " ] = True
# case "False":
elif v == " False " :
hdata [ " matte " ] = False
# case _:
else :
print ( " cannot understand matte " , v )
# case "columns" | "rows":
elif k . lower ( ) in [ " columns " , " rows " ] :
hdata [ k . lower ( ) ] = int ( v )
# case "compression":
elif k . lower ( ) == " compression " :
print ( " compression not yet supported " )
# case "profile":
elif k . lower ( ) == " profile " :
assert v in [ " icc " , " exif " ]
hdata [ " profile " ] = v
# case "resolution":
elif k . lower ( ) == " resolution " :
dpix , dpiy = v . split ( " x " , 1 )
hdata [ " resolution " ] = ( float ( dpix ) , float ( dpiy ) )
assert " depth " in hdata
assert " columns " in hdata
assert " rows " in hdata
#match hdata["class"]:
# case "DirectClass":
if hdata [ " class " ] == " DirectClass " :
if " colors " in hdata :
assert hdata [ " colors " ] == 0
#match hdata["colorspace"]:
# case "sRGB":
if hdata [ " colorspace " ] == " sRGB " :
numchannels = 3
colorspace = Colorspace . RGB
# case "CMYK":
elif hdata [ " colorspace " ] == " CMYK " :
numchannels = 4
colorspace = Colorspace . CMYK
# case "Gray":
elif hdata [ " colorspace " ] == " Gray " :
numchannels = 1
colorspace = Colorspace . L
if hdata . get ( " matte " ) :
numchannels + = 1
if hdata . get ( " profile " ) :
# there is no key encoding the length of icc or exif data
# according to the docs, the profile-icc key is supposed to do this
print ( " FAIL: exif " )
else :
lenimgdata = (
hdata [ " depth " ] / / 8 * numchannels * hdata [ " columns " ] * hdata [ " rows " ]
)
assert len ( rest ) > = lenimgdata , (
len ( rest ) ,
hdata [ " depth " ] ,
numchannels ,
hdata [ " columns " ] ,
hdata [ " rows " ] ,
lenimgdata ,
)
if colorspace == Colorspace . RGB and hdata [ " depth " ] == 8 :
newimg = Image . frombytes ( " RGB " , ( hdata [ " columns " ] , hdata [ " rows " ] ) , rest [ : lenimgdata ] )
imgdata , palette , depth = to_png_data ( newimg )
assert palette == b " "
assert depth == hdata [ " depth " ]
imgfmt = ImageFormat . PNG
else :
imgdata = zlib . compress ( rest [ : lenimgdata ] )
imgfmt = ImageFormat . MIFF
results . append (
(
colorspace ,
hdata . get ( " resolution " ) or ( default_dpi , default_dpi ) ,
imgfmt ,
imgdata ,
None , # smask
hdata [ " columns " ] ,
hdata [ " rows " ] ,
[ ] , # palette
False , # inverted
hdata [ " depth " ] ,
0 , # rotation
None , # icc profile
)
)
if len ( rest ) > lenimgdata :
# another image is here
assert rest [ lenimgdata : ] [ : 14 ] . lower ( ) == b " id=imagemagick "
results . extend ( parse_miff ( rest [ lenimgdata : ] ) )
# case "PseudoClass":
elif hdata [ " class " ] == " PseudoClass " :
assert " colors " in hdata
if hdata . get ( " matte " ) :
numchannels = 2
else :
numchannels = 1
lenpal = 3 * hdata [ " colors " ] * hdata [ " depth " ] / / 8
lenimgdata = numchannels * hdata [ " rows " ] * hdata [ " columns " ]
assert len ( rest ) > = lenpal + lenimgdata , ( len ( rest ) , lenpal , lenimgdata )
results . append (
(
Colorspace . RGB ,
hdata . get ( " resolution " ) or ( default_dpi , default_dpi ) ,
ImageFormat . MIFF ,
zlib . compress ( rest [ lenpal : lenpal + lenimgdata ] ) ,
None , # FIXME: allow alpha channel smask
hdata [ " columns " ] ,
hdata [ " rows " ] ,
rest [ : lenpal ] , # palette
False , # inverted
hdata [ " depth " ] ,
0 , # rotation
None , # icc profile
)
)
if len ( rest ) > lenpal + lenimgdata :
# another image is here
assert rest [ lenpal + lenimgdata : ] [ : 14 ] . lower ( ) == b " id=imagemagick " , (
len ( rest ) ,
lenpal ,
lenimgdata ,
)
results . extend ( parse_miff ( rest [ lenpal + lenimgdata : ] ) )
return results
# fmt: on
def read_images (
rawdata , colorspace , first_frame_only = False , rot = None , include_thumbnails = False
) :
im = BytesIO ( rawdata )
im . seek ( 0 )
imgdata = None
@ -1541,13 +1817,19 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
imgdata = Image . open ( im )
except IOError as e :
# test if it is a jpeg2000 image
if rawdata [ : 12 ] != b " \x00 \x00 \x00 \x0C \x6A \x50 \x20 \x20 \x0D \x0A \x87 \x0A " :
if rawdata [ : 12 ] == b " \x00 \x00 \x00 \x0C \x6A \x50 \x20 \x20 \x0D \x0A \x87 \x0A " :
# image is jpeg2000
imgformat = ImageFormat . JPEG2000
if rawdata [ : 14 ] . lower ( ) == b " id=imagemagick " :
# image is in MIFF format
# this is useful for 16 bit CMYK because PNG cannot do CMYK and thus
# we need PIL but PIL cannot do 16 bit
imgformat = ImageFormat . MIFF
else :
raise ImageOpenError (
" cannot read input image (not jpeg2000). "
" PIL: error reading image: %s " % e
)
# image is jpeg2000
imgformat = ImageFormat . JPEG2000
else :
logger . debug ( " PIL format = %s " , imgdata . format )
imgformat = None
@ -1581,10 +1863,13 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
raise JpegColorspaceError ( " jpeg can ' t be monochrome " )
if color == Colorspace [ " P " ] :
raise JpegColorspaceError ( " jpeg can ' t have a color palette " )
if color == Colorspace [ " RGBA " ] :
if color == Colorspace [ " RGBA " ] and imgformat != ImageFormat . JPEG2000 :
raise JpegColorspaceError ( " jpeg can ' t have an alpha channel " )
logger . debug ( " read_images() embeds a JPEG " )
cleanup ( )
depth = 8
if imgformat == ImageFormat . JPEG2000 :
* _ , depth = jp2 . parse ( rawdata )
return [
(
color ,
@ -1596,7 +1881,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
imgheightpx ,
[ ] ,
False ,
8 ,
depth ,
rotation ,
iccp ,
)
@ -1613,6 +1898,77 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
if imgformat == ImageFormat . MPO :
result = [ ]
img_page_count = 0
assert len ( imgdata . _MpoImageFile__mpoffsets ) == len ( imgdata . mpinfo [ 0xB002 ] )
num_frames = len ( imgdata . mpinfo [ 0xB002 ] )
# An MPO file can be a main image together with one or more thumbnails
# if that is the case, then we only include all frames if the
# --include-thumbnails option is given. If it is not, such an MPO file
# will be embedded as is, so including its thumbnails but showing up
# as a single image page in the resulting PDF.
num_main_frames = 0
num_thumbnail_frames = 0
for i , mpent in enumerate ( imgdata . mpinfo [ 0xB002 ] ) :
# check only the first frame for being the main image
if (
i == 0
and mpent [ " Attribute " ] [ " DependentParentImageFlag " ]
and not mpent [ " Attribute " ] [ " DependentChildImageFlag " ]
and mpent [ " Attribute " ] [ " RepresentativeImageFlag " ]
and mpent [ " Attribute " ] [ " MPType " ] == " Baseline MP Primary Image "
) :
num_main_frames + = 1
elif (
not mpent [ " Attribute " ] [ " DependentParentImageFlag " ]
and mpent [ " Attribute " ] [ " DependentChildImageFlag " ]
and not mpent [ " Attribute " ] [ " RepresentativeImageFlag " ]
and mpent [ " Attribute " ] [ " MPType " ]
in [
" Large Thumbnail (VGA Equivalent) " ,
" Large Thumbnail (Full HD Equivalent) " ,
]
) :
num_thumbnail_frames + = 1
logger . debug ( f " number of frames: { num_frames } " )
logger . debug ( f " number of main frames: { num_main_frames } " )
logger . debug ( f " number of thumbnail frames: { num_thumbnail_frames } " )
# this MPO file is a main image plus zero or more thumbnails
# embed as-is unless the --include-thumbnails option was given
if num_frames == 1 or (
not include_thumbnails
and num_main_frames == 1
and num_thumbnail_frames + 1 == num_frames
) :
color , ndpi , imgwidthpx , imgheightpx , rotation , iccp = get_imgmetadata (
imgdata , imgformat , default_dpi , colorspace , rawdata , rot
)
if color == Colorspace [ " 1 " ] :
raise JpegColorspaceError ( " jpeg can ' t be monochrome " )
if color == Colorspace [ " P " ] :
raise JpegColorspaceError ( " jpeg can ' t have a color palette " )
if color == Colorspace [ " RGBA " ] :
raise JpegColorspaceError ( " jpeg can ' t have an alpha channel " )
logger . debug ( " read_images() embeds an MPO verbatim " )
cleanup ( )
return [
(
color ,
ndpi ,
ImageFormat . JPEG ,
rawdata ,
None ,
imgwidthpx ,
imgheightpx ,
[ ] ,
False ,
8 ,
rotation ,
iccp ,
)
]
# If the control flow reaches here, the MPO has more than a single
# frame but was not detected to be a main image followed by multiple
# thumbnails. We thus treat this MPO as we do other multi-frame images
# and include all its frames as individual pages.
for offset , mpent in zip (
imgdata . _MpoImageFile__mpoffsets , imgdata . mpinfo [ 0xB002 ]
) :
@ -1710,6 +2066,9 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
)
]
if imgformat == ImageFormat . MIFF :
return parse_miff ( rawdata )
# If our input is not JPEG or PNG, then we might have a format that
# supports multiple frames (like TIFF or GIF), so we need a loop to
# iterate through all frames of the image.
@ -1875,7 +2234,16 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
)
)
else :
if (
if color in [ Colorspace . P , Colorspace . PA ] and iccp is not None :
# PDF does not support palette images with icc profile
if color == Colorspace . P :
newcolor = Colorspace . RGB
newimg = newimg . convert ( mode = " RGB " )
elif color == Colorspace . PA :
newcolor = Colorspace . RGBA
newimg = newimg . convert ( mode = " RGBA " )
smaskidat = None
elif (
color == Colorspace . RGBA
or color == Colorspace . LA
or color == Colorspace . PA
@ -1889,25 +2257,21 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
newcolor = color
l , a = newimg . split ( )
newimg = l
elif color == Colorspace . PA or (
color == Colorspace . P and " transparency " in newimg . info
) :
newcolor = color
a = newimg . convert ( mode = " RGBA " ) . split ( ) [ - 1 ]
else :
newcolor = Colorspace . RGBA
r , g , b , a = newimg . convert ( mode = " RGBA " ) . split ( )
newimg = Image . merge ( " RGB " , ( r , g , b ) )
smaskidat , _ , _ = to_png_data ( a )
smaskidat , * _ = to_png_data ( a )
logger . warning (
" Image contains an alpha channel. Computing a separate "
" soft mask (/SMask) image to store transparency in PDF. "
)
elif color in [ Colorspace . P , Colorspace . PA ] and iccp is not None :
# PDF does not support palette images with icc profile
if color == Colorspace . P :
newcolor = Colorspace . RGB
newimg = newimg . convert ( mode = " RGB " )
elif color == Colorspace . PA :
newcolor = Colorspace . RGBA
newimg = newimg . convert ( mode = " RGBA " )
smaskidat = None
else :
newcolor = color
smaskidat = None
@ -2249,7 +2613,6 @@ def find_scale(pagewidth, pageheight):
# as a binary string representing the image content or as filenames to the
# images.
def convert ( * images , * * kwargs ) :
_default_kwargs = dict (
engine = None ,
title = None ,
@ -2279,6 +2642,7 @@ def convert(*images, **kwargs):
artborder = None ,
pdfa = None ,
rotation = None ,
include_thumbnails = False ,
)
for kwname , default in _default_kwargs . items ( ) :
if kwname not in kwargs :
@ -2322,11 +2686,16 @@ def convert(*images, **kwargs):
for img in images :
# img is allowed to be a path, a binary string representing image data
# or a file-like object (really anything that implements read())
try :
rawdata = img . read ( )
except AttributeError :
# or a pathlib.Path object (really anything that implements read_bytes())
rawdata = None
for fun in " read " , " read_bytes " :
try :
rawdata = getattr ( img , fun ) ( )
except AttributeError :
pass
if rawdata is None :
if not isinstance ( img , ( str , bytes ) ) :
raise TypeError ( " Neither implements read() nor is str or bytes " )
raise TypeError ( " Neither read(), read_bytes () nor is str or bytes" )
# the thing doesn't have a read() function, so try if we can treat
# it as a file name
try :
@ -2344,6 +2713,10 @@ def convert(*images, **kwargs):
rawdata = f . read ( )
f . close ( )
# md5 = hashlib.md5(rawdata).hexdigest()
# with open("./testdata/" + md5, "wb") as f:
# f.write(rawdata)
for (
color ,
ndpi ,
@ -2362,6 +2735,7 @@ def convert(*images, **kwargs):
kwargs [ " colorspace " ] ,
kwargs [ " first_frame_only " ] ,
kwargs [ " rotation " ] ,
kwargs [ " include_thumbnails " ] ,
) :
pagewidth , pageheight , imgwidthpdf , imgheightpdf = kwargs [ " layout_fun " ] (
imgwidthpx , imgheightpx , ndpi
@ -2737,7 +3111,7 @@ def valid_date(string):
else :
try :
return parser . parse ( string )
except TypeError :
except :
pass
# as a last resort, try the local date utility
try :
@ -2750,7 +3124,7 @@ def valid_date(string):
except subprocess . CalledProcessError :
pass
else :
return datetime . utc fromtimestamp( int ( utime ) )
return datetime . fromtimestamp( int ( utime ) )
raise argparse . ArgumentTypeError ( " cannot parse date: %s " % string )
@ -3452,7 +3826,35 @@ def gui():
app . mainloop ( )
def main ( argv = sys . argv ) :
def file_is_icc ( fname ) :
with open ( fname , " rb " ) as f :
data = f . read ( 40 )
if len ( data ) < 40 :
return False
return data [ 36 : ] == b " acsp "
def validate_icc ( fname ) :
if not file_is_icc ( fname ) :
raise argparse . ArgumentTypeError ( ' " %s " is not an ICC profile ' % fname )
return fname
def get_default_icc_profile ( ) :
for profile in [
" /usr/share/color/icc/sRGB.icc " ,
" /usr/share/color/icc/OpenICC/sRGB.icc " ,
" /usr/share/color/icc/colord/sRGB.icc " ,
] :
if not os . path . exists ( profile ) :
continue
if not file_is_icc ( profile ) :
continue
return profile
return " /usr/share/color/icc/sRGB.icc "
def get_main_parser ( ) :
rendered_papersizes = " "
for k , v in sorted ( papersizes . items ( ) ) :
rendered_papersizes + = " %-8s %s \n " % ( papernames [ k ] , v )
@ -3493,7 +3895,9 @@ Paper sizes:
the value in the second column has the same effect as giving the short hand
in the first column . Appending ^ T ( a caret / circumflex followed by the letter
T ) turns the paper size from portrait into landscape . The postfix thus
symbolizes the transpose . The values are case insensitive .
symbolizes the transpose . Note that on Windows cmd . exe the caret symbol is
the escape character , so you need to put quotes around the option value .
The values are case insensitive .
% s
@ -3560,7 +3964,7 @@ Examples:
while preserving its aspect ratio and a print border of 2 cm on the top and
bottom and 2.5 cm on the left and right hand side .
$ img2pdf - - output out . pdf - - pagesize A4 ^ T - - border 2 cm : 2.5 cm * . jpg
$ img2pdf - - output out . pdf - - pagesize " A4^T " - - border 2 cm : 2.5 cm * . jpg
On each A4 page , fit images into a 10 cm times 15 cm rectangle but keep the
original image size if the image is smaller than that .
@ -3695,6 +4099,17 @@ RGB.""",
" input image be converted into a page in the resulting PDF. " ,
)
outargs . add_argument (
" --include-thumbnails " ,
action = " store_true " ,
help = " Some multi-frame formats like MPO carry a main image and "
" one or more scaled-down copies of the main image (thumbnails). "
" In such a case, img2pdf will only include the main image and "
" not create additional pages for each of the thumbnails. If this "
" option is set, img2pdf will instead create one page per frame and "
" thus store each thumbnail on its own page. " ,
)
outargs . add_argument (
" --pillow-limit-break " ,
action = " store_true " ,
@ -3706,14 +4121,29 @@ RGB.""",
% Image . MAX_IMAGE_PIXELS ,
)
outargs . add_argument (
" --pdfa " ,
nargs = " ? " ,
const = " /usr/share/color/icc/sRGB.icc " ,
default = None ,
help = " Output a PDF/A-1b compliant document. By default, this will "
" embed /usr/share/color/icc/sRGB.icc as the color profile. " ,
)
if sys . platform == " win32 " :
# on Windows, there are no default paths to search for an ICC profile
# so make the argument required instead of optional
outargs . add_argument (
" --pdfa " ,
type = validate_icc ,
help = " Output a PDF/A-1b compliant document. The argument to this "
" option is the path to the ICC profile that will be embedded into "
" the resulting PDF. " ,
)
else :
outargs . add_argument (
" --pdfa " ,
nargs = " ? " ,
const = get_default_icc_profile ( ) ,
default = None ,
type = validate_icc ,
help = " Output a PDF/A-1b compliant document. By default, this will "
" embed either /usr/share/color/icc/sRGB.icc, "
" /usr/share/color/icc/OpenICC/sRGB.icc or "
" /usr/share/color/icc/colord/sRGB.icc as the color profile, whichever "
" is found to exist first. " ,
)
sizeargs = parser . add_argument_group (
title = " Image and page size and layout arguments " ,
@ -4002,8 +4432,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
action = " store_true " ,
help = " Instruct the PDF viewer to open the PDF in fullscreen mode " ,
)
return parser
args = parser . parse_args ( argv [ 1 : ] )
def main ( argv = sys . argv ) :
args = get_main_parser ( ) . parse_args ( argv [ 1 : ] )
if args . verbose :
logging . basicConfig ( level = logging . DEBUG )
@ -4027,7 +4460,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
elif len ( args . images ) == 0 and len ( args . from_file ) == 0 :
# if no positional arguments were supplied, read a single image from
# standard input
logger . info ( " reading image from standard input " )
print (
" Reading image from standard input... \n "
" Re-run with -h or --help for usage information. " ,
file = sys . stderr ,
)
try :
images = [ sys . stdin . buffer . read ( ) ]
except KeyboardInterrupt :
@ -4088,6 +4525,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
artborder = args . art_border ,
pdfa = args . pdfa ,
rotation = args . rotation ,
include_thumbnails = args . include_thumbnails ,
)
except Exception as e :
logger . error ( " error: " + str ( e ) )