support from multi-frame images like multipage TIFF and animated GIF

This commit is contained in:
Johannes 'josch' Schauer 2016-02-17 00:30:15 +01:00
parent dbed55655a
commit edba669000

View file

@ -541,38 +541,14 @@ class pdfdoc(object):
self.writer.tostream(self.info, outputstream) self.writer.tostream(self.info, outputstream)
def read_image(rawdata, colorspace): def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None):
im = BytesIO(rawdata) if imgformat == ImageFormat.JPEG2000 and rawdata is not None:
im.seek(0)
try:
imgdata = Image.open(im)
except IOError as e:
# test if it is a jpeg2000 image
if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
raise ImageOpenError("cannot read input image (not jpeg2000). "
"PIL: error reading image: %s" % e)
# image is jpeg2000
imgwidthpx, imgheightpx, ics = parsejp2(rawdata) imgwidthpx, imgheightpx, ics = parsejp2(rawdata)
imgformat = ImageFormat.JPEG2000
# TODO: read real dpi from input jpeg2000 image # TODO: read real dpi from input jpeg2000 image
ndpi = (default_dpi, default_dpi) ndpi = (default_dpi, default_dpi)
logging.debug("input dpi = %d x %d", *ndpi)
if colorspace:
color = colorspace
logging.debug("input colorspace (forced) = %s", ics)
else:
color = ics
logging.debug("input colorspace = %s", ics)
else: else:
imgwidthpx, imgheightpx = imgdata.size imgwidthpx, imgheightpx = imgdata.size
imgformat = None
for f in ImageFormat:
if f.name == imgdata.format:
imgformat = f
if imgformat is None:
raise ValueError("unknown PIL image format: %s" % imgdata.format)
ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi)) ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi))
# In python3, the returned dpi value for some tiff images will # In python3, the returned dpi value for some tiff images will
@ -581,6 +557,8 @@ def read_image(rawdata, colorspace):
# float into an integer by rounding. # float into an integer by rounding.
# Search online for the 72.009 dpi problem for more info. # Search online for the 72.009 dpi problem for more info.
ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
ics = imgdata.mode
logging.debug("input dpi = %d x %d", *ndpi) logging.debug("input dpi = %d x %d", *ndpi)
if colorspace: if colorspace:
@ -589,7 +567,7 @@ def read_image(rawdata, colorspace):
else: else:
color = None color = None
for c in Colorspace: for c in Colorspace:
if c.name == imgdata.mode: if c.name == ics:
color = c color = c
if color is None: if color is None:
raise ValueError("unknown PIL colorspace: %s" % imgdata.mode) raise ValueError("unknown PIL colorspace: %s" % imgdata.mode)
@ -606,40 +584,86 @@ def read_image(rawdata, colorspace):
logging.debug("input colorspace = %s", color.name) logging.debug("input colorspace = %s", color.name)
logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx) logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
return (color, ndpi, imgwidthpx, imgheightpx)
def read_images(rawdata, colorspace, first_frame_only=False):
im = BytesIO(rawdata)
im.seek(0)
try:
imgdata = Image.open(im)
except IOError as e:
# test if it is a jpeg2000 image
if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
raise ImageOpenError("cannot read input image (not jpeg2000). "
"PIL: error reading image: %s" % e)
# image is jpeg2000
imgformat = ImageFormat.JPEG2000
else:
imgformat = None
for f in ImageFormat:
if f.name == imgdata.format:
imgformat = f
if imgformat is None:
raise ValueError("unknown PIL image format: %s" % imgdata.format)
logging.debug("imgformat = %s", imgformat.name) logging.debug("imgformat = %s", imgformat.name)
# depending on the input format, determine whether to pass the raw # depending on the input format, determine whether to pass the raw
# image or the zlib compressed color information # image or the zlib compressed color information
if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000: if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000:
color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
imgdata, imgformat, default_dpi, colorspace, rawdata)
if color == Colorspace['1']: if color == Colorspace['1']:
raise MonochromeJpegError("jpeg can't be monochrome") raise MonochromeJpegError("jpeg can't be monochrome")
imgdata = rawdata im.close()
return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx)]
else: else:
result = []
img_page_count = 0
# loop through all frames of the image (example: multipage TIFF)
while True:
try:
imgdata.seek(img_page_count)
except EOFError:
break
if first_frame_only and img_page_count > 0:
break
logging.debug("Converting frame: %d" % img_page_count)
color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
imgdata, imgformat, default_dpi, colorspace)
# because we do not support /CCITTFaxDecode # because we do not support /CCITTFaxDecode
if color == Colorspace['1']: if color == Colorspace['1']:
logging.debug("Converting colorspace 1 to L") logging.debug("Converting colorspace 1 to L")
imgdata = imgdata.convert('L') newimg = imgdata.convert('L')
color = Colorspace.L color = Colorspace.L
elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK, elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK,
Colorspace["CMYK;I"]]: Colorspace["CMYK;I"]]:
logging.debug("Colorspace is OK: %s", color) logging.debug("Colorspace is OK: %s", color)
newimg = imgdata
elif color in [Colorspace.RGBA]: elif color in [Colorspace.RGBA]:
logging.debug("Converting colorspace %s to RGB", color) logging.debug("Converting colorspace %s to RGB", color)
imgdata = imgdata.convert('RGB') newimg = imgdata.convert('RGB')
color = Colorspace.RGB color = Colorspace.RGB
else: else:
raise ValueError("unknown colorspace: %s" % color.name) raise ValueError("unknown colorspace: %s" % color.name)
img = imgdata.tobytes() imggz = zlib.compress(newimg.tobytes())
result.append((color, ndpi, imgformat, imggz, imgwidthpx,
imgheightpx))
img_page_count += 1
# the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
# close() method # close() method
try: try:
imgdata.close() imgdata.close()
except AttributeError: except AttributeError:
pass pass
imgdata = zlib.compress(img)
im.close() im.close()
return result
return color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx
# converts a length in pixels to a length in PDF units (1/72 of an inch) # converts a length in pixels to a length in PDF units (1/72 of an inch)
@ -881,7 +905,7 @@ def convert(*images, title=None,
viewer_initial_page=None, viewer_magnification=None, viewer_initial_page=None, viewer_magnification=None,
viewer_page_layout=None, viewer_fit_window=False, viewer_page_layout=None, viewer_fit_window=False,
viewer_center_window=False, viewer_fullscreen=False, viewer_center_window=False, viewer_fullscreen=False,
with_pdfrw=True, outputstream=None): with_pdfrw=True, outputstream=None, first_frame_only=False):
pdf = pdfdoc("1.3", title, author, creator, producer, creationdate, pdf = pdfdoc("1.3", title, author, creator, producer, creationdate,
moddate, subject, keywords, nodate, viewer_panes, moddate, subject, keywords, nodate, viewer_panes,
@ -906,22 +930,22 @@ def convert(*images, title=None,
# name so we now try treating it as raw image content # name so we now try treating it as raw image content
rawdata = img rawdata = img
color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx = \ for color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx \
read_image(rawdata, colorspace) in read_images(rawdata, colorspace, first_frame_only):
pagewidth, pageheight, imgwidthpdf, imgheightpdf = \ pagewidth, pageheight, imgwidthpdf, imgheightpdf = \
layout_fun(imgwidthpx, imgheightpx, ndpi) layout_fun(imgwidthpx, imgheightpx, ndpi)
if pagewidth < 3.00 or pageheight < 3.00: if pagewidth < 3.00 or pageheight < 3.00:
logging.warning("pdf width or height is below 3.00 - too small " logging.warning("pdf width or height is below 3.00 - too "
"for some viewers!") "small for some viewers!")
elif pagewidth > 14400.0 or pageheight > 14400.0: elif pagewidth > 14400.0 or pageheight > 14400.0:
raise PdfTooLargeError( raise PdfTooLargeError(
"pdf width or height must not exceed 200 inches.") "pdf width or height must not exceed 200 inches.")
# the image is always centered on the page # the image is always centered on the page
imgxpdf = (pagewidth - imgwidthpdf)/2.0 imgxpdf = (pagewidth - imgwidthpdf)/2.0
imgypdf = (pageheight - imgheightpdf)/2.0 imgypdf = (pageheight - imgheightpdf)/2.0
pdf.add_imagepage(color, imgwidthpx, imgheightpx, imgformat, imgdata, pdf.add_imagepage(color, imgwidthpx, imgheightpx, imgformat,
imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, imgdata, imgwidthpdf, imgheightpdf, imgxpdf,
pagewidth, pageheight) imgypdf, pagewidth, pageheight)
if outputstream: if outputstream:
pdf.tostream(outputstream) pdf.tostream(outputstream)
@ -1369,6 +1393,14 @@ RGB.''')
"https://github.com/pmaupin/pdfrw/issues/39) or if you want the " "https://github.com/pmaupin/pdfrw/issues/39) or if you want the "
"PDF code to be more human readable.") "PDF code to be more human readable.")
outargs.add_argument(
"--first-frame-only", action="store_true",
help="By default, img2pdf will convert multi-frame images like "
"multi-page TIFF or animated GIF images to one page per frame. "
"This option will only let the first frame of every multi-frame "
"input image be converted into a page in the resulting PDF."
)
sizeargs = parser.add_argument_group( sizeargs = parser.add_argument_group(
title='Image and page size and layout arguments', title='Image and page size and layout arguments',
description='''\ description='''\
@ -1578,7 +1610,8 @@ values set via the --border option.
viewer_fit_window=args.viewer_fit_window, viewer_fit_window=args.viewer_fit_window,
viewer_center_window=args.viewer_center_window, viewer_center_window=args.viewer_center_window,
viewer_fullscreen=args.viewer_fullscreen, with_pdfrw=not viewer_fullscreen=args.viewer_fullscreen, with_pdfrw=not
args.without_pdfrw, outputstream=args.output) args.without_pdfrw, outputstream=args.output,
first_frame_only=args.first_frame_only)
except Exception as e: except Exception as e:
logging.error("error: " + str(e)) logging.error("error: " + str(e))
if logging.getLogger().isEnabledFor(logging.DEBUG): if logging.getLogger().isEnabledFor(logging.DEBUG):