From d29c596fe79e4bccd986b0f9e045bab3dbab02dd Mon Sep 17 00:00:00 2001 From: Johannes Schauer Marin Rodrigues Date: Mon, 12 Apr 2021 22:57:51 +0200 Subject: [PATCH] add support for MPO images (closes: #93) --- src/img2pdf.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index a89f984..5588838 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -85,7 +85,7 @@ PageOrientation = Enum("PageOrientation", "portrait landscape") Colorspace = Enum("Colorspace", "RGB L 1 CMYK CMYK;I RGBA P other") -ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG TIFF other") +ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG TIFF MPO other") PageMode = Enum("PageMode", "none outlines thumbs") @@ -1402,6 +1402,61 @@ def read_images(rawdata, colorspace, first_frame_only=False): ) ] + # The MPO format is multiple JPEG images concatenated together + # we use the offset and size information to dissect the MPO into its + # individual JPEG images and then embed those into the PDF individually. + # + # The downside is, that this truncates the first JPEG as the MPO metadata + # will still be in it but the referenced images are chopped off. We still + # do it that way instead of adding the full MPO as the first image to not + # store duplicate image data. + if imgformat == ImageFormat.MPO: + result = [] + img_page_count = 0 + for offset, mpent in zip( + imgdata._MpoImageFile__mpoffsets, imgdata.mpinfo[0xB002] + ): + if first_frame_only and img_page_count > 0: + break + rawframe = BytesIO(rawdata[offset : offset + mpent["Size"]]) + imframe = Image.open(rawframe) + # The first frame contains the data that makes the JPEG a MPO + # Could we thus embed an MPO into another MPO? Lets not support + # such madness ;) + if img_page_count > 0 and imframe.format != "JPEG": + raise Exception("MPO payload must be a JPEG %s", imframe.format) + color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata( + imframe, ImageFormat.JPEG, default_dpi, colorspace + ) + imframe.close() + rawframe.close() + if color == Colorspace["1"]: + raise JpegColorspaceError("jpeg can't be monochrome") + if color == Colorspace["P"]: + raise JpegColorspaceError("jpeg can't have a color palette") + if color == Colorspace["RGBA"]: + raise JpegColorspaceError("jpeg can't have an alpha channel") + logger.debug("read_images() embeds a JPEG from MPO") + result.append( + ( + color, + ndpi, + ImageFormat.JPEG, + rawdata[offset : offset + mpent["Size"]], + imgwidthpx, + imgheightpx, + [], + False, + 8, + rotation, + iccp, + ) + ) + img_page_count += 1 + imgdata.close() + im.close() + return result + # We can directly embed the IDAT chunk of PNG images if the PNG is not # interlaced #