2014-07-26 14:12:40 +00:00
|
|
|
#!/usr/bin/env python2
|
|
|
|
|
2014-03-30 06:10:12 +00:00
|
|
|
# Copyright (C) 2012-2014 Johannes 'josch' Schauer <j.schauer at email.de>
|
2013-05-02 06:17:13 +00:00
|
|
|
#
|
2014-03-30 06:10:12 +00:00
|
|
|
# This program is free software: you can redistribute it and/or
|
|
|
|
# modify it under the terms of the GNU Lesser General Public
|
|
|
|
# License as published by the Free Software Foundation, either
|
|
|
|
# version 3 of the License, or (at your option) any later
|
|
|
|
# version.
|
2013-05-02 06:17:13 +00:00
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
2014-03-30 06:10:12 +00:00
|
|
|
# You should have received a copy of the GNU General Public
|
|
|
|
# License along with this program. If not, see
|
|
|
|
# <http://www.gnu.org/licenses/>.
|
2012-03-29 09:08:32 +00:00
|
|
|
|
2015-03-07 02:20:14 +00:00
|
|
|
__version__ = "0.1.6~git"
|
2015-03-20 10:37:30 +00:00
|
|
|
default_dpi = 96.0
|
2015-03-07 02:20:14 +00:00
|
|
|
|
2015-03-20 10:37:30 +00:00
|
|
|
import re
|
2012-03-29 09:08:32 +00:00
|
|
|
import sys
|
|
|
|
import zlib
|
|
|
|
import argparse
|
2014-03-01 04:51:53 +00:00
|
|
|
from PIL import Image
|
2012-03-29 09:08:32 +00:00
|
|
|
from datetime import datetime
|
2013-10-21 13:55:47 +00:00
|
|
|
from jp2 import parsejp2
|
2015-02-16 06:39:07 +00:00
|
|
|
try:
|
|
|
|
from cStringIO import cStringIO
|
|
|
|
except ImportError:
|
|
|
|
from io import BytesIO as cStringIO
|
2012-03-29 09:08:32 +00:00
|
|
|
|
2014-03-01 04:51:53 +00:00
|
|
|
# XXX: Switch to use logging module.
|
|
|
|
def debug_out(message, verbose=True):
|
|
|
|
if verbose:
|
|
|
|
sys.stderr.write("D: "+message+"\n")
|
|
|
|
|
|
|
|
def error_out(message):
|
|
|
|
sys.stderr.write("E: "+message+"\n")
|
|
|
|
|
|
|
|
def warning_out(message):
|
|
|
|
sys.stderr.write("W: "+message+"\n")
|
|
|
|
|
2012-03-29 09:08:32 +00:00
|
|
|
def parse(cont, indent=1):
|
|
|
|
if type(cont) is dict:
|
2015-01-07 14:56:24 +00:00
|
|
|
return b"<<\n"+b"\n".join(
|
2015-03-13 10:43:38 +00:00
|
|
|
[4 * indent * b" " + k + b" " + parse(v, indent+1)
|
2015-01-07 15:23:52 +00:00
|
|
|
for k, v in sorted(cont.items())])+b"\n"+4*(indent-1)*b" "+b">>"
|
2015-02-16 18:09:34 +00:00
|
|
|
elif type(cont) is int:
|
2015-03-13 10:43:38 +00:00
|
|
|
return str(cont).encode()
|
2015-02-16 18:09:34 +00:00
|
|
|
elif type(cont) is float:
|
2015-03-13 10:43:38 +00:00
|
|
|
return ("%0.4f"%cont).encode()
|
2012-03-29 09:08:32 +00:00
|
|
|
elif isinstance(cont, obj):
|
2015-03-13 10:43:38 +00:00
|
|
|
return ("%d 0 R"%cont.identifier).encode()
|
|
|
|
elif type(cont) is str or type(cont) is bytes:
|
|
|
|
if type(cont) is str and type(cont) is not bytes:
|
|
|
|
raise Exception("parse must be passed a bytes object in py3")
|
2012-03-29 09:08:32 +00:00
|
|
|
return cont
|
|
|
|
elif type(cont) is list:
|
2015-01-07 14:56:24 +00:00
|
|
|
return b"[ "+b" ".join([parse(c, indent) for c in cont])+b" ]"
|
2015-03-13 10:43:38 +00:00
|
|
|
else:
|
|
|
|
raise Exception("cannot handle type %s"%type(cont))
|
2012-03-29 09:08:32 +00:00
|
|
|
|
2014-03-01 03:57:40 +00:00
|
|
|
class obj(object):
|
2012-03-29 09:08:32 +00:00
|
|
|
def __init__(self, content, stream=None):
|
|
|
|
self.content = content
|
|
|
|
self.stream = stream
|
|
|
|
|
2013-10-23 10:34:07 +00:00
|
|
|
def tostring(self):
|
2012-03-29 09:08:32 +00:00
|
|
|
if self.stream:
|
2014-03-01 04:51:53 +00:00
|
|
|
return (
|
2015-03-13 10:43:38 +00:00
|
|
|
("%d 0 obj " % self.identifier).encode() +
|
2014-03-01 04:51:53 +00:00
|
|
|
parse(self.content) +
|
2015-01-07 14:56:24 +00:00
|
|
|
b"\nstream\n" + self.stream + b"\nendstream\nendobj\n")
|
2012-03-29 09:08:32 +00:00
|
|
|
else:
|
2015-03-13 10:43:38 +00:00
|
|
|
return ("%d 0 obj "%self.identifier).encode()+parse(self.content)+b" endobj\n"
|
2012-03-29 09:08:32 +00:00
|
|
|
|
2014-03-01 03:57:40 +00:00
|
|
|
class pdfdoc(object):
|
2012-06-15 14:59:31 +00:00
|
|
|
|
2014-03-01 03:57:40 +00:00
|
|
|
def __init__(self, version=3, title=None, author=None, creator=None,
|
|
|
|
producer=None, creationdate=None, moddate=None, subject=None,
|
2015-01-07 15:23:52 +00:00
|
|
|
keywords=None, nodate=False):
|
2013-10-23 10:34:07 +00:00
|
|
|
self.version = version # default pdf version 1.3
|
|
|
|
now = datetime.now()
|
2014-03-01 04:51:53 +00:00
|
|
|
self.objects = []
|
2013-10-23 10:34:07 +00:00
|
|
|
|
2014-03-01 03:57:40 +00:00
|
|
|
info = {}
|
2013-10-23 10:34:07 +00:00
|
|
|
if title:
|
2015-03-13 10:43:38 +00:00
|
|
|
info[b"/Title"] = b"("+title+b")"
|
2013-10-23 10:34:07 +00:00
|
|
|
if author:
|
2015-03-13 10:43:38 +00:00
|
|
|
info[b"/Author"] = b"("+author+b")"
|
2013-10-23 10:34:07 +00:00
|
|
|
if creator:
|
2015-03-13 10:43:38 +00:00
|
|
|
info[b"/Creator"] = b"("+creator+b")"
|
2013-10-23 10:34:07 +00:00
|
|
|
if producer:
|
2015-03-13 10:43:38 +00:00
|
|
|
info[b"/Producer"] = b"("+producer+b")"
|
2013-10-23 10:34:07 +00:00
|
|
|
if creationdate:
|
2015-03-13 10:43:38 +00:00
|
|
|
info[b"/CreationDate"] = b"(D:"+creationdate.strftime("%Y%m%d%H%M%S").encode()+b")"
|
2015-01-07 15:23:52 +00:00
|
|
|
elif not nodate:
|
2015-03-13 10:43:38 +00:00
|
|
|
info[b"/CreationDate"] = b"(D:"+now.strftime("%Y%m%d%H%M%S").encode()+b")"
|
2013-10-23 10:34:07 +00:00
|
|
|
if moddate:
|
2015-03-13 10:43:38 +00:00
|
|
|
info[b"/ModDate"] = b"(D:"+moddate.strftime("%Y%m%d%H%M%S").encode()+b")"
|
2015-01-07 15:23:52 +00:00
|
|
|
elif not nodate:
|
2015-03-13 10:43:38 +00:00
|
|
|
info[b"/ModDate"] = b"(D:"+now.strftime("%Y%m%d%H%M%S").encode()+b")"
|
2013-10-23 10:34:07 +00:00
|
|
|
if subject:
|
2015-03-13 10:43:38 +00:00
|
|
|
info[b"/Subject"] = b"("+subject+b")"
|
2013-10-23 10:34:07 +00:00
|
|
|
if keywords:
|
2015-03-13 10:43:38 +00:00
|
|
|
info[b"/Keywords"] = b"("+b",".join(keywords)+b")"
|
2013-10-23 10:34:07 +00:00
|
|
|
|
|
|
|
self.info = obj(info)
|
|
|
|
|
2014-03-01 03:57:40 +00:00
|
|
|
# create an incomplete pages object so that a /Parent entry can be
|
|
|
|
# added to each page
|
2013-10-23 10:34:07 +00:00
|
|
|
self.pages = obj({
|
2015-03-13 10:43:38 +00:00
|
|
|
b"/Type": b"/Pages",
|
|
|
|
b"/Kids": [],
|
|
|
|
b"/Count": 0
|
2013-10-23 10:34:07 +00:00
|
|
|
})
|
2012-03-29 09:53:57 +00:00
|
|
|
|
2013-10-23 10:34:07 +00:00
|
|
|
self.catalog = obj({
|
2015-03-13 10:43:38 +00:00
|
|
|
b"/Pages": self.pages,
|
|
|
|
b"/Type": b"/Catalog"
|
2013-10-23 10:34:07 +00:00
|
|
|
})
|
|
|
|
self.addobj(self.catalog)
|
|
|
|
self.addobj(self.pages)
|
2013-08-30 08:45:43 +00:00
|
|
|
|
2013-10-23 10:34:07 +00:00
|
|
|
def addobj(self, obj):
|
|
|
|
newid = len(self.objects)+1
|
|
|
|
obj.identifier = newid
|
|
|
|
self.objects.append(obj)
|
|
|
|
|
2014-08-04 15:25:07 +00:00
|
|
|
def addimage(self, color, width, height, imgformat, imgdata, pdf_x, pdf_y):
|
2012-03-29 09:08:32 +00:00
|
|
|
if color == 'L':
|
2015-03-13 10:43:38 +00:00
|
|
|
colorspace = b"/DeviceGray"
|
2012-03-29 09:08:32 +00:00
|
|
|
elif color == 'RGB':
|
2015-03-13 10:43:38 +00:00
|
|
|
colorspace = b"/DeviceRGB"
|
2015-02-15 08:03:16 +00:00
|
|
|
elif color == 'CMYK' or color == 'CMYK;I':
|
2015-03-13 10:43:38 +00:00
|
|
|
colorspace = b"/DeviceCMYK"
|
2012-03-29 09:08:32 +00:00
|
|
|
else:
|
2013-10-23 06:49:43 +00:00
|
|
|
error_out("unsupported color space: %s"%color)
|
2012-03-29 09:08:32 +00:00
|
|
|
exit(1)
|
|
|
|
|
2013-10-23 06:49:59 +00:00
|
|
|
if pdf_x < 3.00 or pdf_y < 3.00:
|
|
|
|
warning_out("pdf width or height is below 3.00 - decrease the dpi")
|
|
|
|
|
2012-03-29 09:08:32 +00:00
|
|
|
# either embed the whole jpeg or deflate the bitmap representation
|
|
|
|
if imgformat is "JPEG":
|
2015-03-13 10:43:38 +00:00
|
|
|
ofilter = [ b"/DCTDecode" ]
|
2014-04-04 23:55:03 +00:00
|
|
|
elif imgformat is "JPEG2000":
|
2015-03-13 10:43:38 +00:00
|
|
|
ofilter = [ b"/JPXDecode" ]
|
2013-10-23 10:34:07 +00:00
|
|
|
self.version = 5 # jpeg2000 needs pdf 1.5
|
2012-03-29 09:08:32 +00:00
|
|
|
else:
|
2015-03-13 10:43:38 +00:00
|
|
|
ofilter = [ b"/FlateDecode" ]
|
2012-03-29 09:08:32 +00:00
|
|
|
image = obj({
|
2015-03-13 10:43:38 +00:00
|
|
|
b"/Type": b"/XObject",
|
|
|
|
b"/Subtype": b"/Image",
|
|
|
|
b"/Filter": ofilter,
|
|
|
|
b"/Width": width,
|
|
|
|
b"/Height": height,
|
|
|
|
b"/ColorSpace": colorspace,
|
2014-03-01 03:57:40 +00:00
|
|
|
# hardcoded as PIL doesnt provide bits for non-jpeg formats
|
2015-03-13 10:43:38 +00:00
|
|
|
b"/BitsPerComponent": 8,
|
|
|
|
b"/Length": len(imgdata)
|
2012-03-29 09:08:32 +00:00
|
|
|
}, imgdata)
|
|
|
|
|
2015-02-15 08:03:16 +00:00
|
|
|
if color == 'CMYK;I':
|
|
|
|
# Inverts all four channels
|
2015-03-13 10:43:38 +00:00
|
|
|
image.content[b'/Decode'] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
|
2015-02-15 08:03:16 +00:00
|
|
|
|
2015-03-13 10:43:38 +00:00
|
|
|
text = ("q\n%0.4f 0 0 %0.4f 0 0 cm\n/Im0 Do\nQ"%(pdf_x, pdf_y)).encode()
|
2012-03-29 09:08:32 +00:00
|
|
|
|
|
|
|
content = obj({
|
2015-03-13 10:43:38 +00:00
|
|
|
b"/Length": len(text)
|
2012-03-29 09:08:32 +00:00
|
|
|
}, text)
|
|
|
|
|
|
|
|
page = obj({
|
2015-03-13 10:43:38 +00:00
|
|
|
b"/Type": b"/Page",
|
|
|
|
b"/Parent": self.pages,
|
|
|
|
b"/Resources": {
|
|
|
|
b"/XObject": {
|
|
|
|
b"/Im0": image
|
2012-03-29 09:08:32 +00:00
|
|
|
}
|
|
|
|
},
|
2015-03-13 10:43:38 +00:00
|
|
|
b"/MediaBox": [0, 0, pdf_x, pdf_y],
|
|
|
|
b"/Contents": content
|
2012-03-29 09:08:32 +00:00
|
|
|
})
|
2015-03-13 10:43:38 +00:00
|
|
|
self.pages.content[b"/Kids"].append(page)
|
|
|
|
self.pages.content[b"/Count"] += 1
|
2013-10-23 10:34:07 +00:00
|
|
|
self.addobj(page)
|
|
|
|
self.addobj(content)
|
|
|
|
self.addobj(image)
|
|
|
|
|
|
|
|
def tostring(self):
|
|
|
|
# add info as last object
|
|
|
|
self.addobj(self.info)
|
|
|
|
|
|
|
|
xreftable = list()
|
|
|
|
|
2015-03-13 10:43:38 +00:00
|
|
|
result = ("%%PDF-1.%d\n"%self.version).encode()
|
2013-10-23 10:34:07 +00:00
|
|
|
|
2015-01-07 14:56:24 +00:00
|
|
|
xreftable.append(b"0000000000 65535 f \n")
|
2013-10-23 10:34:07 +00:00
|
|
|
for o in self.objects:
|
2015-03-13 10:43:38 +00:00
|
|
|
xreftable.append(("%010d 00000 n \n"%len(result)).encode())
|
2013-10-23 10:34:07 +00:00
|
|
|
result += o.tostring()
|
|
|
|
|
|
|
|
xrefoffset = len(result)
|
2015-01-07 14:56:24 +00:00
|
|
|
result += b"xref\n"
|
2015-03-13 10:43:38 +00:00
|
|
|
result += ("0 %d\n"%len(xreftable)).encode()
|
2013-10-23 10:34:07 +00:00
|
|
|
for x in xreftable:
|
|
|
|
result += x
|
2015-01-07 14:56:24 +00:00
|
|
|
result += b"trailer\n"
|
2015-03-13 10:43:38 +00:00
|
|
|
result += parse({b"/Size": len(xreftable), b"/Info": self.info, b"/Root": self.catalog})+b"\n"
|
2015-01-07 14:56:24 +00:00
|
|
|
result += b"startxref\n"
|
2015-03-13 10:43:38 +00:00
|
|
|
result += ("%d\n"%xrefoffset).encode()
|
2015-01-07 14:56:24 +00:00
|
|
|
result += b"%%EOF\n"
|
2013-10-23 10:34:07 +00:00
|
|
|
return result
|
2012-03-29 09:08:32 +00:00
|
|
|
|
2015-03-20 10:37:30 +00:00
|
|
|
def convert(images, dpi=None, pagesize=(None, None, None), title=None,
|
|
|
|
author=None, creator=None, producer=None, creationdate=None,
|
|
|
|
moddate=None, subject=None, keywords=None, colorspace=None,
|
|
|
|
nodate=False, verbose=False):
|
|
|
|
|
|
|
|
pagesize_options = pagesize[2]
|
2012-03-29 09:08:32 +00:00
|
|
|
|
2014-03-01 03:57:40 +00:00
|
|
|
pdf = pdfdoc(3, title, author, creator, producer, creationdate,
|
2015-02-16 13:49:53 +00:00
|
|
|
moddate, subject, keywords, nodate)
|
2012-03-29 09:08:32 +00:00
|
|
|
|
2014-11-06 07:46:47 +00:00
|
|
|
for imfilename in images:
|
|
|
|
debug_out("Reading %s"%imfilename, verbose)
|
2015-02-16 06:39:07 +00:00
|
|
|
try:
|
|
|
|
rawdata = imfilename.read()
|
|
|
|
im = cStringIO(rawdata)
|
|
|
|
except:
|
|
|
|
with open(imfilename, "rb") as im:
|
|
|
|
rawdata = im.read()
|
|
|
|
im = cStringIO(rawdata)
|
|
|
|
try:
|
|
|
|
imgdata = Image.open(im)
|
|
|
|
except IOError as e:
|
|
|
|
# test if it is a jpeg2000 image
|
|
|
|
if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
|
|
|
error_out("cannot read input image (not jpeg2000)")
|
|
|
|
error_out("PIL: %s"%e)
|
|
|
|
exit(1)
|
|
|
|
# image is jpeg2000
|
|
|
|
width, height, ics = parsejp2(rawdata)
|
|
|
|
imgformat = "JPEG2000"
|
|
|
|
|
2015-03-20 10:37:30 +00:00
|
|
|
# TODO: read real dpi from input jpeg2000 image
|
|
|
|
ndpi = (default_dpi, default_dpi)
|
|
|
|
debug_out("input dpi = %d x %d" % ndpi, verbose)
|
2015-02-16 06:39:07 +00:00
|
|
|
|
|
|
|
if colorspace:
|
|
|
|
color = colorspace
|
|
|
|
debug_out("input colorspace (forced) = %s"%(ics))
|
|
|
|
else:
|
|
|
|
color = ics
|
|
|
|
debug_out("input colorspace = %s"%(ics), verbose)
|
|
|
|
else:
|
|
|
|
width, height = imgdata.size
|
|
|
|
imgformat = imgdata.format
|
|
|
|
|
2015-03-20 10:37:30 +00:00
|
|
|
ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi))
|
|
|
|
# in python3, the returned dpi value for some tiff images will
|
|
|
|
# not be an integer but a float. To make the behaviour of
|
|
|
|
# img2pdf the same between python2 and python3, we convert that
|
|
|
|
# float into an integer by rounding
|
|
|
|
# search online for the 72.009 dpi problem for more info
|
|
|
|
ndpi = (int(round(ndpi[0])),int(round(ndpi[1])))
|
|
|
|
debug_out("input dpi = %d x %d" % ndpi, verbose)
|
2015-02-16 06:39:07 +00:00
|
|
|
|
|
|
|
if colorspace:
|
|
|
|
color = colorspace
|
|
|
|
debug_out("input colorspace (forced) = %s"%(color), verbose)
|
|
|
|
else:
|
|
|
|
color = imgdata.mode
|
|
|
|
if color == "CMYK" and imgformat == "JPEG":
|
|
|
|
# Adobe inverts CMYK JPEGs for some reason, and others
|
|
|
|
# have followed suit as well. Some software assumes the
|
|
|
|
# JPEG is inverted if the Adobe tag (APP14), while other
|
|
|
|
# software assumes all CMYK JPEGs are inverted. I don't
|
|
|
|
# have enough experience with these to know which is
|
|
|
|
# better for images currently in the wild, so I'm going
|
|
|
|
# with the first approach for now.
|
|
|
|
if "adobe" in imgdata.info:
|
|
|
|
color = "CMYK;I"
|
|
|
|
debug_out("input colorspace = %s"%(color), verbose)
|
|
|
|
|
|
|
|
debug_out("width x height = %d x %d"%(width,height), verbose)
|
|
|
|
debug_out("imgformat = %s"%imgformat, verbose)
|
|
|
|
|
2015-03-20 10:37:30 +00:00
|
|
|
if dpi:
|
|
|
|
ndpi = dpi, dpi
|
|
|
|
debug_out("input dpi (forced) = %d x %d" % ndpi, verbose)
|
|
|
|
elif pagesize_options:
|
|
|
|
ndpi = get_ndpi(width, height, pagesize)
|
|
|
|
debug_out("calculated dpi (based on pagesize) = %d x %d" % ndpi, verbose)
|
|
|
|
|
2015-02-16 06:39:07 +00:00
|
|
|
# depending on the input format, determine whether to pass the raw
|
|
|
|
# image or the zlib compressed color information
|
|
|
|
if imgformat is "JPEG" or imgformat is "JPEG2000":
|
|
|
|
if color == '1':
|
|
|
|
error_out("jpeg can't be monochrome")
|
|
|
|
exit(1)
|
|
|
|
imgdata = rawdata
|
|
|
|
else:
|
|
|
|
# because we do not support /CCITTFaxDecode
|
|
|
|
if color == '1':
|
|
|
|
debug_out("Converting colorspace 1 to L", verbose)
|
|
|
|
imgdata = imgdata.convert('L')
|
|
|
|
color = 'L'
|
|
|
|
elif color in ("RGB", "L", "CMYK", "CMYK;I"):
|
|
|
|
debug_out("Colorspace is OK: %s"%color, verbose)
|
2014-11-06 07:47:42 +00:00
|
|
|
else:
|
2015-02-16 06:39:07 +00:00
|
|
|
debug_out("Converting colorspace %s to RGB"%color, verbose)
|
|
|
|
imgdata = imgdata.convert('RGB')
|
|
|
|
color = imgdata.mode
|
2015-02-16 18:19:49 +00:00
|
|
|
img = imgdata.tobytes()
|
2015-03-07 01:59:12 +00:00
|
|
|
# the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the close() method
|
|
|
|
try:
|
|
|
|
imgdata.close()
|
|
|
|
except AttributeError:
|
|
|
|
pass
|
2015-02-16 18:19:49 +00:00
|
|
|
imgdata = zlib.compress(img)
|
|
|
|
im.close()
|
2013-10-23 10:34:07 +00:00
|
|
|
|
2015-03-20 10:37:30 +00:00
|
|
|
if pagesize_options and pagesize_options['exact'][1]:
|
|
|
|
# output size exactly to specified dimensions
|
|
|
|
# pagesize[0], pagesize[1] already checked in valid_size()
|
|
|
|
pdf_x, pdf_y = pagesize[0], pagesize[1]
|
2015-01-15 05:15:05 +00:00
|
|
|
else:
|
2015-03-20 10:37:30 +00:00
|
|
|
# output size based on dpi; point = 1/72 inch
|
|
|
|
pdf_x, pdf_y = 72.0*width/float(ndpi[0]), 72.0*height/float(ndpi[1])
|
2014-08-04 15:25:07 +00:00
|
|
|
|
|
|
|
pdf.addimage(color, width, height, imgformat, imgdata, pdf_x, pdf_y)
|
2013-10-23 10:34:07 +00:00
|
|
|
|
|
|
|
return pdf.tostring()
|
2012-03-29 09:08:32 +00:00
|
|
|
|
2015-03-20 10:37:30 +00:00
|
|
|
def get_ndpi(width, height, pagesize):
|
|
|
|
pagesize_options = pagesize[2]
|
|
|
|
|
|
|
|
if pagesize_options and pagesize_options['fill'][1]:
|
|
|
|
if width/height < pagesize[0]/pagesize[1]:
|
|
|
|
tmp_dpi = 72.0*width/pagesize[0]
|
|
|
|
else:
|
|
|
|
tmp_dpi = 72.0*height/pagesize[1]
|
|
|
|
elif pagesize[0] and pagesize[1]:
|
|
|
|
# if both height and width given with no specific pagesize_option,
|
|
|
|
# resize to fit "into" page
|
|
|
|
if width/height < pagesize[0]/pagesize[1]:
|
|
|
|
tmp_dpi = 72.0*height/pagesize[1]
|
|
|
|
else:
|
|
|
|
tmp_dpi = 72.0*width/pagesize[0]
|
|
|
|
elif pagesize[0]:
|
|
|
|
# if width given, calculate dpi based on width
|
|
|
|
tmp_dpi = 72.0*width/pagesize[0]
|
|
|
|
elif pagesize[1]:
|
|
|
|
# if height given, calculate dpi based on height
|
|
|
|
tmp_dpi = 72.0*height/pagesize[1]
|
|
|
|
else:
|
|
|
|
tmp_dpi = default_dpi
|
|
|
|
|
|
|
|
return tmp_dpi, tmp_dpi
|
2014-03-01 03:57:40 +00:00
|
|
|
|
|
|
|
def positive_float(string):
|
|
|
|
value = float(string)
|
|
|
|
if value <= 0:
|
|
|
|
msg = "%r is not positive"%string
|
|
|
|
raise argparse.ArgumentTypeError(msg)
|
|
|
|
return value
|
|
|
|
|
|
|
|
def valid_date(string):
|
|
|
|
return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")
|
|
|
|
|
2015-03-20 10:37:30 +00:00
|
|
|
def get_standard_papersize(string):
|
|
|
|
papersizes = {
|
|
|
|
"11x17" : "792x792^", # "792x1224",
|
|
|
|
"ledger" : "792x792^", # "1224x792",
|
|
|
|
"legal" : "612x612^", # "612x1008",
|
|
|
|
"letter" : "612x612^", # "612x792",
|
|
|
|
"arche" : "2592x2592^", # "2592x3456",
|
|
|
|
"archd" : "1728x1728^", # "1728x2592",
|
|
|
|
"archc" : "1296x1296^", # "1296x1728",
|
|
|
|
"archb" : "864x864^", # "864x1296",
|
|
|
|
"archa" : "648x648^", # "648x864",
|
|
|
|
"a0" : "2380x2380^", # "2380x3368",
|
|
|
|
"a1" : "1684x1684^", # "1684x2380",
|
|
|
|
"a2" : "1190x1190^", # "1190x1684",
|
|
|
|
"a3" : "842x842^", # "842x1190",
|
|
|
|
"a4" : "595x595^", # "595x842",
|
|
|
|
"a5" : "421x421^", # "421x595",
|
|
|
|
"a6" : "297x297^", # "297x421",
|
|
|
|
"a7" : "210x210^", # "210x297",
|
|
|
|
"a8" : "148x148^", # "148x210",
|
|
|
|
"a9" : "105x105^", # "105x148",
|
|
|
|
"a10" : "74x74^", # "74x105",
|
|
|
|
"b0" : "2836x2836^", # "2836x4008",
|
|
|
|
"b1" : "2004x2004^", # "2004x2836",
|
|
|
|
"b2" : "1418x1418^", # "1418x2004",
|
|
|
|
"b3" : "1002x1002^", # "1002x1418",
|
|
|
|
"b4" : "709x709^", # "709x1002",
|
|
|
|
"b5" : "501x501^", # "501x709",
|
|
|
|
"c0" : "2600x2600^", # "2600x3677",
|
|
|
|
"c1" : "1837x1837^", # "1837x2600",
|
|
|
|
"c2" : "1298x1298^", # "1298x1837",
|
|
|
|
"c3" : "918x918^", # "918x1298",
|
|
|
|
"c4" : "649x649^", # "649x918",
|
|
|
|
"c5" : "459x459^", # "459x649",
|
|
|
|
"c6" : "323x323^", # "323x459",
|
|
|
|
"flsa" : "612x612^", # "612x936",
|
|
|
|
"flse" : "612x612^", # "612x936",
|
|
|
|
"halfletter" : "396x396^", # "396x612",
|
|
|
|
"tabloid" : "792x792^", # "792x1224",
|
|
|
|
"statement" : "396x396^", # "396x612",
|
|
|
|
"executive" : "540x540^", # "540x720",
|
|
|
|
"folio" : "612x612^", # "612x936",
|
|
|
|
"quarto" : "610x610^", # "610x780"
|
|
|
|
}
|
|
|
|
|
|
|
|
string = string.lower()
|
|
|
|
return papersizes.get(string, string)
|
|
|
|
|
2015-03-06 18:29:24 +00:00
|
|
|
def valid_size(string):
|
2015-03-20 10:37:30 +00:00
|
|
|
# conversion factors from units to points
|
|
|
|
units = {
|
|
|
|
'in' : 72.0,
|
|
|
|
'cm' : 72.0/2.54,
|
|
|
|
'mm' : 72.0/25.4,
|
|
|
|
'pt' : 1.0
|
|
|
|
}
|
|
|
|
|
|
|
|
pagesize_options = {
|
|
|
|
'exact' : ['\!', False],
|
|
|
|
'shrink' : ['\>', False],
|
|
|
|
'enlarge' : ['\<', False],
|
|
|
|
'fill' : ['\^', False],
|
|
|
|
'percent' : ['\%', False],
|
|
|
|
'count' : ['\@', False],
|
|
|
|
}
|
|
|
|
|
|
|
|
string = get_standard_papersize(string)
|
|
|
|
|
|
|
|
pattern = re.compile(r"""
|
|
|
|
([0-9]*\.?[0-9]*) # tokens.group(1) == width; may be empty
|
|
|
|
([a-z]*) # tokens.group(2) == units; may be empty
|
|
|
|
x
|
|
|
|
([0-9]*\.?[0-9]*) # tokens.group(3) == height; may be empty
|
|
|
|
([a-zA-Z]*) # tokens.group(4) == units; may be empty
|
|
|
|
([^0-9a-zA-Z]*) # tokens.group(5) == extra options
|
|
|
|
""", re.VERBOSE)
|
|
|
|
|
|
|
|
tokens = pattern.match(string)
|
|
|
|
|
|
|
|
# tokens.group(0) should match entire input string
|
|
|
|
if tokens.group(0) != string:
|
|
|
|
msg = ('Input size needs to be of the format AuxBv#, '
|
|
|
|
'where A is width, B is height, u and v are units, '
|
|
|
|
'# are options. '
|
|
|
|
'You may omit either width or height, but not both. '
|
|
|
|
'Units may be specified as (in, cm, mm, pt). '
|
|
|
|
'You may omit units, which will default to pt. '
|
|
|
|
'Available options include (! = exact ; ^ = fill ; default = into).')
|
2015-03-06 18:29:24 +00:00
|
|
|
raise argparse.ArgumentTypeError(msg)
|
2015-03-20 10:37:30 +00:00
|
|
|
|
|
|
|
# temporary list to loop through to process width and height
|
|
|
|
pagesize_size = {
|
|
|
|
'x' : [0, tokens.group(1), tokens.group(2)],
|
|
|
|
'y' : [0, tokens.group(3), tokens.group(4)]
|
|
|
|
}
|
|
|
|
|
|
|
|
for key, value in pagesize_size.items():
|
|
|
|
try:
|
|
|
|
value[0] = float(value[1])
|
|
|
|
value[0] *= units[value[2]] # convert to points
|
|
|
|
except ValueError, e:
|
|
|
|
# assign None if width or height not provided
|
|
|
|
value[0] = None
|
|
|
|
except KeyError, e:
|
|
|
|
# if units unrecognized, raise error
|
|
|
|
# otherwise default to pt because units not provided
|
|
|
|
if value[2]:
|
|
|
|
msg = "unrecognized unit '%s'." % value[2]
|
|
|
|
raise argparse.ArgumentTypeError(msg)
|
|
|
|
|
|
|
|
x = pagesize_size['x'][0]
|
|
|
|
y = pagesize_size['y'][0]
|
|
|
|
|
|
|
|
# parse options for resize methods
|
|
|
|
if tokens.group(5):
|
|
|
|
for key, value in pagesize_options.items():
|
|
|
|
if re.search(value[0], tokens.group(5)):
|
|
|
|
value[1] = True
|
|
|
|
|
|
|
|
if pagesize_options['fill'][1]:
|
|
|
|
# if either width or height is not given, try to fill in missing value
|
|
|
|
if not x:
|
|
|
|
x = y
|
|
|
|
elif not y:
|
|
|
|
y = x
|
|
|
|
|
|
|
|
if pagesize_options['exact'][1]:
|
|
|
|
if not x or not y:
|
|
|
|
msg = ('exact size requires both width and height.')
|
|
|
|
raise argparse.ArgumentTypeError(msg)
|
|
|
|
|
|
|
|
if not x and not y:
|
|
|
|
msg = ('width and height cannot both be omitted.')
|
|
|
|
raise argparse.ArgumentTypeError(msg)
|
|
|
|
|
|
|
|
return (x, y, pagesize_options)
|
2015-03-06 18:29:24 +00:00
|
|
|
|
2015-03-13 10:43:38 +00:00
|
|
|
# in python3, the received argument will be a unicode str() object which needs
|
|
|
|
# to be encoded into a bytes() object
|
|
|
|
# in python2, the received argument will be a binary str() object which needs
|
|
|
|
# no encoding
|
|
|
|
# we check whether we use python2 or python3 by checking whether the argument
|
|
|
|
# is both, type str and type bytes (only the case in python2)
|
|
|
|
def pdf_embedded_string(string):
|
|
|
|
if type(string) is str and type(string) is not bytes:
|
|
|
|
string = string.encode("utf8")
|
|
|
|
return string
|
|
|
|
|
2014-03-01 03:57:40 +00:00
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
description='Lossless conversion/embedding of images (in)to pdf')
|
|
|
|
parser.add_argument(
|
2014-11-06 07:46:47 +00:00
|
|
|
'images', metavar='infile', type=str,
|
2014-03-01 03:57:40 +00:00
|
|
|
nargs='+', help='input file(s)')
|
|
|
|
parser.add_argument(
|
|
|
|
'-o', '--output', metavar='out', type=argparse.FileType('wb'),
|
2015-03-20 10:37:30 +00:00
|
|
|
default=getattr(sys.stdout, "buffer", sys.stdout),
|
|
|
|
help='output file (default: stdout)')
|
2015-03-06 18:29:24 +00:00
|
|
|
|
|
|
|
sizeopts = parser.add_mutually_exclusive_group()
|
|
|
|
sizeopts.add_argument(
|
2014-03-01 03:57:40 +00:00
|
|
|
'-d', '--dpi', metavar='dpi', type=positive_float,
|
2015-03-20 10:37:30 +00:00
|
|
|
help=('dpi for pdf output. '
|
|
|
|
'If input image does not specify dpi the default is %.2f. '
|
|
|
|
'Must not be used with -s/--pagesize.') % default_dpi
|
|
|
|
)
|
|
|
|
|
2015-03-06 18:29:24 +00:00
|
|
|
sizeopts.add_argument(
|
|
|
|
'-s', '--pagesize', metavar='size', type=valid_size,
|
2015-03-20 10:37:30 +00:00
|
|
|
default=(None, None, None),
|
|
|
|
help=('size of the pdf pages in format AuxBv#, '
|
|
|
|
'where A is width, B is height, u and v are units, # are options. '
|
|
|
|
'You may omit either width or height, but not both. '
|
|
|
|
'Some common page sizes, such as letter and a4, are also recognized. '
|
|
|
|
'Units may be specified as (in, cm, mm, pt). '
|
|
|
|
'Units default to pt when absent. '
|
|
|
|
'Available options include (! = exact ; ^ = fill ; default = into). '
|
|
|
|
'Must not be used with -d/--dpi.')
|
|
|
|
)
|
2015-03-06 18:29:24 +00:00
|
|
|
|
2014-03-01 03:57:40 +00:00
|
|
|
parser.add_argument(
|
2015-03-13 10:43:38 +00:00
|
|
|
'-t', '--title', metavar='title', type=pdf_embedded_string,
|
2014-03-01 03:57:40 +00:00
|
|
|
help='title for metadata')
|
|
|
|
parser.add_argument(
|
2015-03-13 10:43:38 +00:00
|
|
|
'-a', '--author', metavar='author', type=pdf_embedded_string,
|
2014-03-01 03:57:40 +00:00
|
|
|
help='author for metadata')
|
|
|
|
parser.add_argument(
|
2015-03-13 10:43:38 +00:00
|
|
|
'-c', '--creator', metavar='creator', type=pdf_embedded_string,
|
2014-03-01 03:57:40 +00:00
|
|
|
help='creator for metadata')
|
|
|
|
parser.add_argument(
|
2015-03-13 10:43:38 +00:00
|
|
|
'-p', '--producer', metavar='producer', type=pdf_embedded_string,
|
2014-03-01 03:57:40 +00:00
|
|
|
help='producer for metadata')
|
|
|
|
parser.add_argument(
|
|
|
|
'-r', '--creationdate', metavar='creationdate', type=valid_date,
|
|
|
|
help='creation date for metadata in YYYY-MM-DDTHH:MM:SS format')
|
|
|
|
parser.add_argument(
|
|
|
|
'-m', '--moddate', metavar='moddate', type=valid_date,
|
|
|
|
help='modification date for metadata in YYYY-MM-DDTHH:MM:SS format')
|
|
|
|
parser.add_argument(
|
2015-03-13 10:43:38 +00:00
|
|
|
'-S', '--subject', metavar='subject', type=pdf_embedded_string,
|
2014-03-01 03:57:40 +00:00
|
|
|
help='subject for metadata')
|
|
|
|
parser.add_argument(
|
2015-03-13 10:43:38 +00:00
|
|
|
'-k', '--keywords', metavar='kw', type=pdf_embedded_string, nargs='+',
|
2014-03-01 03:57:40 +00:00
|
|
|
help='keywords for metadata')
|
|
|
|
parser.add_argument(
|
2015-03-13 10:43:38 +00:00
|
|
|
'-C', '--colorspace', metavar='colorspace', type=pdf_embedded_string,
|
2015-02-15 08:03:16 +00:00
|
|
|
help='force PIL colorspace (one of: RGB, L, 1, CMYK, CMYK;I)')
|
2015-01-07 15:23:52 +00:00
|
|
|
parser.add_argument(
|
|
|
|
'-D', '--nodate', help='do not add timestamps', action="store_true")
|
2014-03-01 03:57:40 +00:00
|
|
|
parser.add_argument(
|
|
|
|
'-v', '--verbose', help='verbose mode', action="store_true")
|
2015-03-07 02:20:14 +00:00
|
|
|
parser.add_argument(
|
|
|
|
'-V', '--version', action='version', version='%(prog)s '+__version__,
|
|
|
|
help="Print version information and exit")
|
2014-03-01 03:57:40 +00:00
|
|
|
|
|
|
|
def main(args=None):
|
|
|
|
if args is None:
|
|
|
|
args = sys.argv[1:]
|
|
|
|
args = parser.parse_args(args)
|
2014-08-04 15:25:07 +00:00
|
|
|
|
2014-03-01 03:57:40 +00:00
|
|
|
args.output.write(
|
|
|
|
convert(
|
2015-03-06 18:29:24 +00:00
|
|
|
args.images, args.dpi, args.pagesize, args.title, args.author,
|
2014-03-01 03:57:40 +00:00
|
|
|
args.creator, args.producer, args.creationdate, args.moddate,
|
2015-02-16 13:49:53 +00:00
|
|
|
args.subject, args.keywords, args.colorspace, args.nodate,
|
|
|
|
args.verbose))
|
2014-03-14 18:13:03 +00:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|