better jp2 parsing based on jpylyzer
This commit is contained in:
parent
25a2178444
commit
610a5ecdd6
2 changed files with 97 additions and 10 deletions
17
img2pdf.py
17
img2pdf.py
|
@ -21,6 +21,7 @@ import zlib
|
||||||
import argparse
|
import argparse
|
||||||
import struct
|
import struct
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from jp2 import parsejp2
|
||||||
|
|
||||||
def parse(cont, indent=1):
|
def parse(cont, indent=1):
|
||||||
if type(cont) is dict:
|
if type(cont) is dict:
|
||||||
|
@ -95,23 +96,21 @@ def main(images, dpi, title=None, author=None, creator=None, producer=None,
|
||||||
})
|
})
|
||||||
|
|
||||||
for im in images:
|
for im in images:
|
||||||
|
rawdata = im.read()
|
||||||
try:
|
try:
|
||||||
imgdata = Image.open(im)
|
imgdata = Image.open(im)
|
||||||
except IOError:
|
except IOError:
|
||||||
# test if it is a jpeg2000 image
|
# test if it is a jpeg2000 image
|
||||||
im.seek(0)
|
if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
||||||
if im.read(12) != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
|
||||||
print "cannot read input image"
|
print "cannot read input image"
|
||||||
exit(1)
|
exit(1)
|
||||||
# image is jpeg2000
|
# image is jpeg2000
|
||||||
|
width, height, ics = parsejp2(rawdata)
|
||||||
imgformat = "JP2"
|
imgformat = "JP2"
|
||||||
offset, = struct.unpack(">I", im.read(4))
|
|
||||||
im.seek(28+offset)
|
|
||||||
height, width = struct.unpack(">II", im.read(8))
|
|
||||||
if colorspace:
|
if colorspace:
|
||||||
color = colorspace
|
color = colorspace
|
||||||
else:
|
else:
|
||||||
color = "RGB" # TODO: read real colorspace
|
color = ics
|
||||||
if dpi:
|
if dpi:
|
||||||
dpi_x, dpi_y = dpi, dpi
|
dpi_x, dpi_y = dpi, dpi
|
||||||
else:
|
else:
|
||||||
|
@ -147,12 +146,10 @@ def main(images, dpi, title=None, author=None, creator=None, producer=None,
|
||||||
# either embed the whole jpeg or deflate the bitmap representation
|
# either embed the whole jpeg or deflate the bitmap representation
|
||||||
if imgformat is "JPEG":
|
if imgformat is "JPEG":
|
||||||
ofilter = [ "/DCTDecode" ]
|
ofilter = [ "/DCTDecode" ]
|
||||||
im.seek(0)
|
imgdata = rawdata
|
||||||
imgdata = im.read()
|
|
||||||
elif imgformat is "JP2":
|
elif imgformat is "JP2":
|
||||||
ofilter = [ "/JPXDecode" ]
|
ofilter = [ "/JPXDecode" ]
|
||||||
im.seek(0)
|
imgdata = rawdata
|
||||||
imgdata = im.read()
|
|
||||||
version = 5 # jpeg2000 needs pdf 1.5
|
version = 5 # jpeg2000 needs pdf 1.5
|
||||||
else:
|
else:
|
||||||
ofilter = [ "/FlateDecode" ]
|
ofilter = [ "/FlateDecode" ]
|
||||||
|
|
90
jp2.py
Normal file
90
jp2.py
Normal file
|
@ -0,0 +1,90 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013 Johannes 'josch' Schauer <j.schauer at email.de>
|
||||||
|
#
|
||||||
|
# this module is heavily based upon jpylyzer which is
|
||||||
|
# KB / National Library of the Netherlands, Open Planets Foundation
|
||||||
|
# and released under the same license conditions
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Lesser General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import struct
|
||||||
|
|
||||||
|
def getBox(data, byteStart, noBytes):
|
||||||
|
boxLengthValue = struct.unpack(">I", data[byteStart:byteStart+4])[0]
|
||||||
|
boxType = data[byteStart+4:byteStart+8]
|
||||||
|
contentsStartOffset = 8
|
||||||
|
if boxLengthValue == 1:
|
||||||
|
boxLengthValue = struct.unpack(">Q", data[byteStart+8:byteStart+16])[0]
|
||||||
|
contentsStartOffset = 16
|
||||||
|
if boxLengthValue == 0:
|
||||||
|
boxLengthValue = noBytes-byteStart
|
||||||
|
byteEnd = byteStart + boxLengthValue
|
||||||
|
boxContents = data[byteStart+contentsStartOffset:byteEnd]
|
||||||
|
return (boxLengthValue, boxType, byteEnd, boxContents)
|
||||||
|
|
||||||
|
def parse_ihdr(data):
|
||||||
|
height = struct.unpack(">I", data[0:4])[0]
|
||||||
|
width = struct.unpack(">I", data[4:8])[0]
|
||||||
|
return width, height
|
||||||
|
|
||||||
|
def parse_colr(data):
|
||||||
|
meth = struct.unpack(">B", data[0:1])[0]
|
||||||
|
if meth != 1:
|
||||||
|
raise Exception("only enumerated color method supported")
|
||||||
|
enumCS = struct.unpack(">I", data[3:])[0]
|
||||||
|
if enumCS == 16:
|
||||||
|
return "RGB"
|
||||||
|
elif enumCS == 17:
|
||||||
|
return "L"
|
||||||
|
else:
|
||||||
|
raise Exception("only sRGB and greyscale color space is supported, got %d"%enumCS)
|
||||||
|
|
||||||
|
def parse_jp2h(data):
|
||||||
|
width, height, colorspace = None, None, None
|
||||||
|
noBytes=len(data)
|
||||||
|
byteStart=0
|
||||||
|
boxLengthValue=1 # dummy value for while loop condition
|
||||||
|
while byteStart < noBytes and boxLengthValue != 0:
|
||||||
|
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||||
|
if boxType == 'ihdr':
|
||||||
|
width, height = parse_ihdr(boxContents)
|
||||||
|
elif boxType == 'colr':
|
||||||
|
colorspace = parse_colr(boxContents)
|
||||||
|
byteStart = byteEnd
|
||||||
|
return (width, height, colorspace)
|
||||||
|
|
||||||
|
def parsejp2(data):
|
||||||
|
noBytes=len(data)
|
||||||
|
byteStart=0
|
||||||
|
boxLengthValue=1 # dummy value for while loop condition
|
||||||
|
while byteStart < noBytes and boxLengthValue != 0:
|
||||||
|
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||||
|
if boxType == 'jp2h':
|
||||||
|
width, height, colorspace = parse_jp2h(boxContents)
|
||||||
|
byteStart = byteEnd
|
||||||
|
if not width:
|
||||||
|
raise Exception("no width in jp2 header")
|
||||||
|
if not height:
|
||||||
|
raise Exception("no height in jp2 header")
|
||||||
|
if not colorspace:
|
||||||
|
raise Exception("no colorspace in jp2 header")
|
||||||
|
return (width, height, colorspace)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
width, height, colorspace = parsejp2(open(sys.argv[1]).read())
|
||||||
|
print "width = %d"%width
|
||||||
|
print "height = %d"%height
|
||||||
|
print "colorspace = %s"%colorspace
|
Loading…
Reference in a new issue