From 09064e8e70a0549147db172eb5b122e50479fd36 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Marin Rodrigues Date: Tue, 8 Aug 2023 07:40:38 +0200 Subject: [PATCH] jp2: rudimentary support for raw jpeg2000 without jp2 boxes --- src/img2pdf.py | 8 ++++---- src/jp2.py | 46 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index 904b647..68e94f4 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -37,7 +37,7 @@ if hasattr(GifImagePlugin, "LoadingStrategy"): # TiffImagePlugin.DEBUG = True from PIL.ExifTags import TAGS from datetime import datetime, timezone -from jp2 import parsejp2 +import jp2 from enum import Enum from io import BytesIO import logging @@ -1301,7 +1301,7 @@ def get_imgmetadata( if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None: # this codepath gets called if the PIL installation is not able to # handle JPEG2000 files - imgwidthpx, imgheightpx, ics, hdpi, vdpi, channels, bpp = parsejp2(rawdata) + imgwidthpx, imgheightpx, ics, hdpi, vdpi, channels, bpp = jp2.parse(rawdata) if hdpi is None: hdpi = default_dpi @@ -1843,7 +1843,7 @@ def read_images( cleanup() depth = 8 if imgformat == ImageFormat.JPEG2000: - _, _, _, _, _, _, depth = parsejp2(rawdata) + *_, depth = jp2.parse(rawdata) return [ ( color, @@ -2241,7 +2241,7 @@ def read_images( r, g, b, a = newimg.convert(mode="RGBA").split() newimg = Image.merge("RGB", (r, g, b)) - smaskidat, _, _ = to_png_data(a) + smaskidat, *_ = to_png_data(a) logger.warning( "Image contains an alpha channel. Computing a separate " "soft mask (/SMask) image to store transparency in PDF." diff --git a/src/jp2.py b/src/jp2.py index d305d38..44d3e21 100644 --- a/src/jp2.py +++ b/src/jp2.py @@ -38,7 +38,7 @@ def getBox(data, byteStart, noBytes): def parse_ihdr(data): height, width, channels, bpp = struct.unpack(">IIHB", data[:11]) - return width, height, channels, bpp+1 + return width, height, channels, bpp + 1 def parse_colr(data): @@ -58,8 +58,8 @@ def parse_colr(data): def parse_resc(data): hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data) - hdpi = ((hnum / hden) * (10 ** hexp) * 100) / 2.54 - vdpi = ((vnum / vden) * (10 ** vexp) * 100) / 2.54 + hdpi = ((hnum / hden) * (10**hexp) * 100) / 2.54 + vdpi = ((vnum / vden) * (10**vexp) * 100) / 2.54 return hdpi, vdpi @@ -101,7 +101,9 @@ def parsejp2(data): while byteStart < noBytes and boxLengthValue != 0: boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes) if boxType == b"jp2h": - width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h(boxContents) + width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h( + boxContents + ) break byteStart = byteEnd if not width: @@ -114,10 +116,38 @@ def parsejp2(data): return (width, height, colorspace, hdpi, vdpi, channels, bpp) +def parsej2k(data): + lsiz, rsiz, xsiz, ysiz, xosiz, yosiz, _, _, _, _, csiz = struct.unpack( + ">HHIIIIIIIIH", data[4:42] + ) + ssiz = [None] * csiz + xrsiz = [None] * csiz + yrsiz = [None] * csiz + for i in range(csiz): + ssiz[i], xrsiz[i], yrsiz[i] = struct.unpack( + "BBB", data[42 + 3 * i : 42 + 3 * (i + 1)] + ) + assert ssiz == [7, 7, 7] + return xsiz - xosiz, ysiz - yosiz, None, None, None, csiz, 8 + + +def parse(data): + if data[:4] == b"\xff\x4f\xff\x51": + return parsej2k(data) + else: + return parsejp2(data) + + if __name__ == "__main__": import sys - width, height, colorspace = parsejp2(open(sys.argv[1]).read()) - sys.stdout.write("width = %d" % width) - sys.stdout.write("height = %d" % height) - sys.stdout.write("colorspace = %s" % colorspace) + width, height, colorspace, hdpi, vdpi, channels, bpp = parse( + open(sys.argv[1], "rb").read() + ) + print("width = %d" % width) + print("height = %d" % height) + print("colorspace = %s" % colorspace) + print("hdpi = %s" % hdpi) + print("vdpi = %s" % vdpi) + print("channels = %s" % channels) + print("bpp = %s" % bpp)