major refactoring
This commit is contained in:
parent
50aff1474f
commit
46a510d2b9
1 changed files with 158 additions and 138 deletions
296
img2pdf.py
296
img2pdf.py
|
@ -29,7 +29,7 @@ def parse(cont, indent=1):
|
||||||
elif type(cont) is int or type(cont) is float:
|
elif type(cont) is int or type(cont) is float:
|
||||||
return str(cont)
|
return str(cont)
|
||||||
elif isinstance(cont, obj):
|
elif isinstance(cont, obj):
|
||||||
return "%d 0 R"%cont.get_identifier()
|
return "%d 0 R"%cont.identifier
|
||||||
elif type(cont) is str:
|
elif type(cont) is str:
|
||||||
return cont
|
return cont
|
||||||
elif type(cont) is list:
|
elif type(cont) is list:
|
||||||
|
@ -40,117 +40,73 @@ class obj():
|
||||||
self.content = content
|
self.content = content
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
|
|
||||||
def tostring(self, identifier):
|
def tostring(self):
|
||||||
self.identifier = identifier
|
|
||||||
if self.stream:
|
if self.stream:
|
||||||
return "%d 0 obj "%identifier+parse(self.content)+"\nstream\n"+self.stream+"\nendstream\nendobj\n"
|
return "%d 0 obj "%self.identifier+parse(self.content)+"\nstream\n"+self.stream+"\nendstream\nendobj\n"
|
||||||
else:
|
else:
|
||||||
return "%d 0 obj "%identifier+parse(self.content)+" endobj\n"
|
return "%d 0 obj "%self.identifier+parse(self.content)+" endobj\n"
|
||||||
|
|
||||||
def get_identifier(self):
|
class pdfdoc():
|
||||||
if not hasattr(self, 'identifier'):
|
objects = list()
|
||||||
raise Exception("no id set yet, call tostring() on obj first")
|
|
||||||
return self.identifier
|
|
||||||
|
|
||||||
def main(images, dpi, title=None, author=None, creator=None, producer=None,
|
def __init__(self, version=3, title=None, author=None, creator=None, producer=None,
|
||||||
creationdate=None, moddate=None, subject=None, keywords=None,
|
creationdate=None, moddate=None, subject=None, keywords=None):
|
||||||
colorspace=None, verbose=False):
|
self.version = version # default pdf version 1.3
|
||||||
|
now = datetime.now()
|
||||||
|
|
||||||
version = 3 # default pdf version 1.3
|
info = dict()
|
||||||
|
if title:
|
||||||
now = datetime.now()
|
info["/Title"] = "("+title+")"
|
||||||
|
if author:
|
||||||
def debug_out(message):
|
info["/Author"] = "("+author+")"
|
||||||
if verbose:
|
if creator:
|
||||||
sys.stderr.write("D: "+message+"\n")
|
info["/Creator"] = "("+creator+")"
|
||||||
def error_out(message):
|
if producer:
|
||||||
sys.stderr.write("E: "+message+"\n")
|
info["/Producer"] = "("+producer+")"
|
||||||
def warning_out(message):
|
if creationdate:
|
||||||
sys.stderr.write("W: "+message+"\n")
|
info["/CreationDate"] = "(D:"+creationdate.strftime("%Y%m%d%H%M%S")+")"
|
||||||
|
|
||||||
info = dict()
|
|
||||||
if title:
|
|
||||||
info["/Title"] = "("+title+")"
|
|
||||||
if author:
|
|
||||||
info["/Author"] = "("+author+")"
|
|
||||||
if creator:
|
|
||||||
info["/Creator"] = "("+creator+")"
|
|
||||||
if producer:
|
|
||||||
info["/Producer"] = "("+producer+")"
|
|
||||||
if creationdate:
|
|
||||||
info["/CreationDate"] = "(D:"+creationdate.strftime("%Y%m%d%H%M%S")+")"
|
|
||||||
else:
|
|
||||||
info["/CreationDate"] = "(D:"+now.strftime("%Y%m%d%H%M%S")+")"
|
|
||||||
if moddate:
|
|
||||||
info["/ModDate"] = "(D:"+moddate.strftime("%Y%m%d%H%M%S")+")"
|
|
||||||
else:
|
|
||||||
info["/ModDate"] = "(D:"+now.strftime("%Y%m%d%H%M%S")+")"
|
|
||||||
if subject:
|
|
||||||
info["/Subject"] = "("+subject+")"
|
|
||||||
if keywords:
|
|
||||||
info["/Keywords"] = "("+",".join(keywords)+")"
|
|
||||||
|
|
||||||
info = obj(info)
|
|
||||||
|
|
||||||
pagestuples = list()
|
|
||||||
|
|
||||||
# create an incomplete pages object so that a /Parent entry can be added to each page
|
|
||||||
pages = obj({
|
|
||||||
"/Type": "/Pages"
|
|
||||||
})
|
|
||||||
|
|
||||||
for im in images:
|
|
||||||
rawdata = im.read()
|
|
||||||
im.seek(0)
|
|
||||||
try:
|
|
||||||
imgdata = Image.open(im)
|
|
||||||
except IOError as e:
|
|
||||||
# test if it is a jpeg2000 image
|
|
||||||
if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
|
||||||
error_out("cannot read input image (not jpeg2000)")
|
|
||||||
error_out("PIL: %s"%e)
|
|
||||||
exit(1)
|
|
||||||
# image is jpeg2000
|
|
||||||
width, height, ics = parsejp2(rawdata)
|
|
||||||
imgformat = "JP2"
|
|
||||||
if colorspace:
|
|
||||||
color = colorspace
|
|
||||||
else:
|
|
||||||
color = ics
|
|
||||||
debug_out("input colorspace = %s"%(ics))
|
|
||||||
if dpi:
|
|
||||||
dpi_x, dpi_y = dpi, dpi
|
|
||||||
else:
|
|
||||||
dpi_x, dpi_y = (96, 96) # TODO: read real dpi
|
|
||||||
else:
|
else:
|
||||||
width, height = imgdata.size
|
info["/CreationDate"] = "(D:"+now.strftime("%Y%m%d%H%M%S")+")"
|
||||||
if dpi:
|
if moddate:
|
||||||
dpi_x, dpi_y = dpi, dpi
|
info["/ModDate"] = "(D:"+moddate.strftime("%Y%m%d%H%M%S")+")"
|
||||||
else:
|
else:
|
||||||
dpi_x, dpi_y = imgdata.info.get("dpi", (96, 96))
|
info["/ModDate"] = "(D:"+now.strftime("%Y%m%d%H%M%S")+")"
|
||||||
debug_out("input dpi = %d x %d"%(dpi_x,dpi_y))
|
if subject:
|
||||||
imgformat = imgdata.format
|
info["/Subject"] = "("+subject+")"
|
||||||
if colorspace:
|
if keywords:
|
||||||
color = colorspace
|
info["/Keywords"] = "("+",".join(keywords)+")"
|
||||||
else:
|
|
||||||
color = imgdata.mode
|
|
||||||
debug_out("input colorspace = %s"%(color))
|
|
||||||
|
|
||||||
debug_out("width x height = %d x %d"%(width,height))
|
self.info = obj(info)
|
||||||
|
|
||||||
|
# create an incomplete pages object so that a /Parent entry can be added to each page
|
||||||
|
self.pages = obj({
|
||||||
|
"/Type": "/Pages",
|
||||||
|
"/Kids": [],
|
||||||
|
"/Count": 0
|
||||||
|
})
|
||||||
|
|
||||||
|
self.catalog = obj({
|
||||||
|
"/Pages": self.pages,
|
||||||
|
"/Type": "/Catalog"
|
||||||
|
})
|
||||||
|
self.addobj(self.catalog)
|
||||||
|
self.addobj(self.pages)
|
||||||
|
|
||||||
|
def addobj(self, obj):
|
||||||
|
newid = len(self.objects)+1
|
||||||
|
obj.identifier = newid
|
||||||
|
self.objects.append(obj)
|
||||||
|
|
||||||
|
def addimage(self, color, width, height, dpi, imgformat, imgdata):
|
||||||
if color == 'L':
|
if color == 'L':
|
||||||
color = "/DeviceGray"
|
color = "/DeviceGray"
|
||||||
elif color == 'RGB':
|
elif color == 'RGB':
|
||||||
color = "/DeviceRGB"
|
color = "/DeviceRGB"
|
||||||
elif color == '1':
|
|
||||||
# TODO: /CCITTFaxDecode monochrome images
|
|
||||||
imgdata = imgdata.convert('L')
|
|
||||||
color = "/DeviceGray"
|
|
||||||
else:
|
else:
|
||||||
error_out("unsupported color space: %s"%color)
|
error_out("unsupported color space: %s"%color)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
pdf_x, pdf_y = 72.0*width/dpi_x, 72.0*height/dpi_y # pdf units = 1/72 inch
|
pdf_x, pdf_y = 72.0*width/dpi[0], 72.0*height/dpi[1] # pdf units = 1/72 inch
|
||||||
|
|
||||||
if pdf_x < 3.00 or pdf_y < 3.00:
|
if pdf_x < 3.00 or pdf_y < 3.00:
|
||||||
warning_out("pdf width or height is below 3.00 - decrease the dpi")
|
warning_out("pdf width or height is below 3.00 - decrease the dpi")
|
||||||
|
@ -158,16 +114,11 @@ def main(images, dpi, title=None, author=None, creator=None, producer=None,
|
||||||
# either embed the whole jpeg or deflate the bitmap representation
|
# either embed the whole jpeg or deflate the bitmap representation
|
||||||
if imgformat is "JPEG":
|
if imgformat is "JPEG":
|
||||||
ofilter = [ "/DCTDecode" ]
|
ofilter = [ "/DCTDecode" ]
|
||||||
imgdata = rawdata
|
|
||||||
elif imgformat is "JP2":
|
elif imgformat is "JP2":
|
||||||
ofilter = [ "/JPXDecode" ]
|
ofilter = [ "/JPXDecode" ]
|
||||||
imgdata = rawdata
|
self.version = 5 # jpeg2000 needs pdf 1.5
|
||||||
version = 5 # jpeg2000 needs pdf 1.5
|
|
||||||
else:
|
else:
|
||||||
ofilter = [ "/FlateDecode" ]
|
ofilter = [ "/FlateDecode" ]
|
||||||
imgdata = zlib.compress(imgdata.tostring())
|
|
||||||
im.close()
|
|
||||||
|
|
||||||
image = obj({
|
image = obj({
|
||||||
"/Type": "/XObject",
|
"/Type": "/XObject",
|
||||||
"/Subtype": "/Image",
|
"/Subtype": "/Image",
|
||||||
|
@ -187,7 +138,7 @@ def main(images, dpi, title=None, author=None, creator=None, producer=None,
|
||||||
|
|
||||||
page = obj({
|
page = obj({
|
||||||
"/Type": "/Page",
|
"/Type": "/Page",
|
||||||
"/Parent": pages,
|
"/Parent": self.pages,
|
||||||
"/Resources": {
|
"/Resources": {
|
||||||
"/XObject": {
|
"/XObject": {
|
||||||
"/Im0": image
|
"/Im0": image
|
||||||
|
@ -196,50 +147,119 @@ def main(images, dpi, title=None, author=None, creator=None, producer=None,
|
||||||
"/MediaBox": [0, 0, pdf_x, pdf_y],
|
"/MediaBox": [0, 0, pdf_x, pdf_y],
|
||||||
"/Contents": content
|
"/Contents": content
|
||||||
})
|
})
|
||||||
|
self.pages.content["/Kids"].append(page)
|
||||||
|
self.pages.content["/Count"] += 1
|
||||||
|
self.addobj(page)
|
||||||
|
self.addobj(content)
|
||||||
|
self.addobj(image)
|
||||||
|
|
||||||
pagestuples.append((image, content, page))
|
def tostring(self):
|
||||||
|
# add info as last object
|
||||||
|
self.addobj(self.info)
|
||||||
|
|
||||||
# complete pages object with page information
|
xreftable = list()
|
||||||
pages.content["/Kids"] = [ pagetuple[2] for pagetuple in pagestuples ]
|
|
||||||
pages.content["/Count"] = len(pagestuples)
|
|
||||||
|
|
||||||
catalog = obj({
|
result = "%%PDF-1.%d\n"%self.version
|
||||||
"/Pages": pages,
|
|
||||||
"/Type": "/Catalog"
|
|
||||||
})
|
|
||||||
|
|
||||||
objects = list()
|
xreftable.append("0000000000 65535 f \n")
|
||||||
objects.append(info.tostring(3*(len(pagestuples)+1)))
|
for o in self.objects:
|
||||||
pages.identifier = 2 # manually set it because each page references to it
|
xreftable.append("%010d 00000 n \n"%len(result))
|
||||||
for i, (image, content, page) in enumerate(reversed(pagestuples)):
|
result += o.tostring()
|
||||||
objects.append(image.tostring(3*(len(pagestuples)-i+1)-1))
|
|
||||||
objects.append(content.tostring(3*(len(pagestuples)-i+1)-2))
|
|
||||||
objects.append(page.tostring(3*(len(pagestuples)-i+1)-3))
|
|
||||||
objects.append(pages.tostring(2))
|
|
||||||
objects.append(catalog.tostring(1))
|
|
||||||
objects.reverse()
|
|
||||||
|
|
||||||
xreftable = list()
|
xrefoffset = len(result)
|
||||||
|
result += "xref\n"
|
||||||
|
result += "0 %d\n"%len(xreftable)
|
||||||
|
for x in xreftable:
|
||||||
|
result += x
|
||||||
|
result += "trailer\n"
|
||||||
|
result += parse({"/Size": len(xreftable), "/Info": self.info, "/Root": self.catalog})+"\n"
|
||||||
|
result += "startxref\n"
|
||||||
|
result += "%d\n"%xrefoffset
|
||||||
|
result += "%%EOF\n"
|
||||||
|
return result
|
||||||
|
|
||||||
result = "%%PDF-1.%d\n"%version
|
def main(images, dpi, title=None, author=None, creator=None, producer=None,
|
||||||
|
creationdate=None, moddate=None, subject=None, keywords=None,
|
||||||
|
colorspace=None, verbose=False):
|
||||||
|
|
||||||
xreftable.append("0000000000 65535 f \n")
|
def debug_out(message):
|
||||||
for o in objects:
|
if verbose:
|
||||||
xreftable.append("%010d 00000 n \n"%len(result))
|
sys.stderr.write("D: "+message+"\n")
|
||||||
result += o
|
def error_out(message):
|
||||||
|
sys.stderr.write("E: "+message+"\n")
|
||||||
|
def warning_out(message):
|
||||||
|
sys.stderr.write("W: "+message+"\n")
|
||||||
|
|
||||||
xrefoffset = len(result)
|
pdf = pdfdoc()
|
||||||
result += "xref\n"
|
|
||||||
result += "0 %d\n"%len(xreftable)
|
|
||||||
for x in xreftable:
|
|
||||||
result += x
|
|
||||||
result += "trailer\n"
|
|
||||||
result += parse({"/Size": len(xreftable), "/Info": info, "/Root": catalog})+"\n"
|
|
||||||
result += "startxref\n"
|
|
||||||
result += "%d\n"%xrefoffset
|
|
||||||
result += "%%EOF\n"
|
|
||||||
|
|
||||||
return result
|
for im in images:
|
||||||
|
rawdata = im.read()
|
||||||
|
im.seek(0)
|
||||||
|
try:
|
||||||
|
imgdata = Image.open(im)
|
||||||
|
except IOError as e:
|
||||||
|
# test if it is a jpeg2000 image
|
||||||
|
if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
||||||
|
error_out("cannot read input image (not jpeg2000)")
|
||||||
|
error_out("PIL: %s"%e)
|
||||||
|
exit(1)
|
||||||
|
# image is jpeg2000
|
||||||
|
width, height, ics = parsejp2(rawdata)
|
||||||
|
imgformat = "JP2"
|
||||||
|
|
||||||
|
if dpi:
|
||||||
|
dpi = dpi, dpi
|
||||||
|
debug_out("input dpi (forced) = %d x %d"%dpi)
|
||||||
|
else:
|
||||||
|
dpi = (96, 96) # TODO: read real dpi
|
||||||
|
debug_out("input dpi = %d x %d"%dpi)
|
||||||
|
|
||||||
|
if colorspace:
|
||||||
|
color = colorspace
|
||||||
|
debug_out("input colorspace (forced) = %s"%(ics))
|
||||||
|
else:
|
||||||
|
color = ics
|
||||||
|
debug_out("input colorspace = %s"%(ics))
|
||||||
|
else:
|
||||||
|
width, height = imgdata.size
|
||||||
|
imgformat = imgdata.format
|
||||||
|
|
||||||
|
if dpi:
|
||||||
|
dpi = dpi, dpi
|
||||||
|
debug_out("input dpi (forced) = %d x %d"%dpi)
|
||||||
|
else:
|
||||||
|
dpi = imgdata.info.get("dpi", (96, 96))
|
||||||
|
debug_out("input dpi = %d x %d"%dpi)
|
||||||
|
|
||||||
|
if colorspace:
|
||||||
|
color = colorspace
|
||||||
|
debug_out("input colorspace (forced) = %s"%(color))
|
||||||
|
else:
|
||||||
|
color = imgdata.mode
|
||||||
|
debug_out("input colorspace = %s"%(color))
|
||||||
|
|
||||||
|
debug_out("width x height = %d x %d"%(width,height))
|
||||||
|
debug_out("imgformat = %s"%imgformat)
|
||||||
|
|
||||||
|
# depending on the input format, determine whether to pass the raw
|
||||||
|
# image or the zlib compressed color information
|
||||||
|
if imgformat is "JPEG" or imgformat is "JP2":
|
||||||
|
if color == '1':
|
||||||
|
error_out("jpeg can't be monochrome")
|
||||||
|
exit(1)
|
||||||
|
imgdata = rawdata
|
||||||
|
else:
|
||||||
|
# because we do not support /CCITTFaxDecode
|
||||||
|
if color == '1':
|
||||||
|
imgdata = imgdata.convert('L')
|
||||||
|
color = 'L'
|
||||||
|
imgdata = zlib.compress(imgdata.tostring())
|
||||||
|
|
||||||
|
pdf.addimage(color, width, height, dpi, imgformat, imgdata)
|
||||||
|
|
||||||
|
im.close()
|
||||||
|
|
||||||
|
return pdf.tostring()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description='lossless conversion/embedding of images (in)to pdf')
|
parser = argparse.ArgumentParser(description='lossless conversion/embedding of images (in)to pdf')
|
||||||
|
|
Loading…
Reference in a new issue