From 610a5ecdd6255f87d7b704294921d1ece9b22dbf Mon Sep 17 00:00:00 2001 From: josch Date: Mon, 21 Oct 2013 15:55:47 +0200 Subject: [PATCH] better jp2 parsing based on jpylyzer --- img2pdf.py | 17 +++++------ jp2.py | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 10 deletions(-) create mode 100644 jp2.py diff --git a/img2pdf.py b/img2pdf.py index 64c4a99..bb1f9f1 100755 --- a/img2pdf.py +++ b/img2pdf.py @@ -21,6 +21,7 @@ import zlib import argparse import struct from datetime import datetime +from jp2 import parsejp2 def parse(cont, indent=1): if type(cont) is dict: @@ -95,23 +96,21 @@ def main(images, dpi, title=None, author=None, creator=None, producer=None, }) for im in images: + rawdata = im.read() try: imgdata = Image.open(im) except IOError: # test if it is a jpeg2000 image - im.seek(0) - if im.read(12) != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": + if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": print "cannot read input image" exit(1) # image is jpeg2000 + width, height, ics = parsejp2(rawdata) imgformat = "JP2" - offset, = struct.unpack(">I", im.read(4)) - im.seek(28+offset) - height, width = struct.unpack(">II", im.read(8)) if colorspace: color = colorspace else: - color = "RGB" # TODO: read real colorspace + color = ics if dpi: dpi_x, dpi_y = dpi, dpi else: @@ -147,12 +146,10 @@ def main(images, dpi, title=None, author=None, creator=None, producer=None, # either embed the whole jpeg or deflate the bitmap representation if imgformat is "JPEG": ofilter = [ "/DCTDecode" ] - im.seek(0) - imgdata = im.read() + imgdata = rawdata elif imgformat is "JP2": ofilter = [ "/JPXDecode" ] - im.seek(0) - imgdata = im.read() + imgdata = rawdata version = 5 # jpeg2000 needs pdf 1.5 else: ofilter = [ "/FlateDecode" ] diff --git a/jp2.py b/jp2.py new file mode 100644 index 0000000..addfe5d --- /dev/null +++ b/jp2.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# +# Copyright (C) 2013 Johannes 'josch' Schauer +# +# this module is heavily based upon jpylyzer which is +# KB / National Library of the Netherlands, Open Planets Foundation +# and released under the same license conditions +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +import struct + +def getBox(data, byteStart, noBytes): + boxLengthValue = struct.unpack(">I", data[byteStart:byteStart+4])[0] + boxType = data[byteStart+4:byteStart+8] + contentsStartOffset = 8 + if boxLengthValue == 1: + boxLengthValue = struct.unpack(">Q", data[byteStart+8:byteStart+16])[0] + contentsStartOffset = 16 + if boxLengthValue == 0: + boxLengthValue = noBytes-byteStart + byteEnd = byteStart + boxLengthValue + boxContents = data[byteStart+contentsStartOffset:byteEnd] + return (boxLengthValue, boxType, byteEnd, boxContents) + +def parse_ihdr(data): + height = struct.unpack(">I", data[0:4])[0] + width = struct.unpack(">I", data[4:8])[0] + return width, height + +def parse_colr(data): + meth = struct.unpack(">B", data[0:1])[0] + if meth != 1: + raise Exception("only enumerated color method supported") + enumCS = struct.unpack(">I", data[3:])[0] + if enumCS == 16: + return "RGB" + elif enumCS == 17: + return "L" + else: + raise Exception("only sRGB and greyscale color space is supported, got %d"%enumCS) + +def parse_jp2h(data): + width, height, colorspace = None, None, None + noBytes=len(data) + byteStart=0 + boxLengthValue=1 # dummy value for while loop condition + while byteStart < noBytes and boxLengthValue != 0: + boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes) + if boxType == 'ihdr': + width, height = parse_ihdr(boxContents) + elif boxType == 'colr': + colorspace = parse_colr(boxContents) + byteStart = byteEnd + return (width, height, colorspace) + +def parsejp2(data): + noBytes=len(data) + byteStart=0 + boxLengthValue=1 # dummy value for while loop condition + while byteStart < noBytes and boxLengthValue != 0: + boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes) + if boxType == 'jp2h': + width, height, colorspace = parse_jp2h(boxContents) + byteStart = byteEnd + if not width: + raise Exception("no width in jp2 header") + if not height: + raise Exception("no height in jp2 header") + if not colorspace: + raise Exception("no colorspace in jp2 header") + return (width, height, colorspace) + +if __name__ == "__main__": + import sys + width, height, colorspace = parsejp2(open(sys.argv[1]).read()) + print "width = %d"%width + print "height = %d"%height + print "colorspace = %s"%colorspace