Changes to pdf page size handling
Changes to `valid_size()` * accept common page sizes, such as letter and a4. * parse dimensions of format: AuxBv#, where A is width, u is units, B is height, v is units, # are options. * accept units: in, cm, mm, pt Changes to `convert()`: * resize pages based on dpi calculations * default resize images into page size (like default resize in imagemagick) * implement exact resizing (ignore dpi; equiv to ! in imagemagick) Created `get_ndpi()`: * provides dpi for page resizing * implement fill resizing (equiv to ^ in imagemagick) Other changes: * default dpi in global variable
This commit is contained in:
parent
36fb9173fe
commit
592cdc1cdb
1 changed files with 208 additions and 50 deletions
258
src/img2pdf.py
258
src/img2pdf.py
|
@ -18,7 +18,9 @@
|
||||||
# <http://www.gnu.org/licenses/>.
|
# <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
__version__ = "0.1.6~git"
|
__version__ = "0.1.6~git"
|
||||||
|
default_dpi = 96.0
|
||||||
|
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import zlib
|
import zlib
|
||||||
import argparse
|
import argparse
|
||||||
|
@ -212,10 +214,12 @@ class pdfdoc(object):
|
||||||
result += b"%%EOF\n"
|
result += b"%%EOF\n"
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def convert(images, dpi=None, pagesize=(None, None), title=None, author=None,
|
def convert(images, dpi=None, pagesize=(None, None, None), title=None,
|
||||||
creator=None, producer=None, creationdate=None, moddate=None,
|
author=None, creator=None, producer=None, creationdate=None,
|
||||||
subject=None, keywords=None, colorspace=None, nodate=False,
|
moddate=None, subject=None, keywords=None, colorspace=None,
|
||||||
verbose=False):
|
nodate=False, verbose=False):
|
||||||
|
|
||||||
|
pagesize_options = pagesize[2]
|
||||||
|
|
||||||
pdf = pdfdoc(3, title, author, creator, producer, creationdate,
|
pdf = pdfdoc(3, title, author, creator, producer, creationdate,
|
||||||
moddate, subject, keywords, nodate)
|
moddate, subject, keywords, nodate)
|
||||||
|
@ -241,13 +245,9 @@ def convert(images, dpi=None, pagesize=(None, None), title=None, author=None,
|
||||||
width, height, ics = parsejp2(rawdata)
|
width, height, ics = parsejp2(rawdata)
|
||||||
imgformat = "JPEG2000"
|
imgformat = "JPEG2000"
|
||||||
|
|
||||||
if dpi:
|
# TODO: read real dpi from input jpeg2000 image
|
||||||
ndpi = dpi, dpi
|
ndpi = (default_dpi, default_dpi)
|
||||||
debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)
|
debug_out("input dpi = %d x %d" % ndpi, verbose)
|
||||||
else:
|
|
||||||
# TODO: read real dpi from input jpeg2000 image
|
|
||||||
ndpi = (96, 96)
|
|
||||||
debug_out("input dpi = %d x %d"%ndpi, verbose)
|
|
||||||
|
|
||||||
if colorspace:
|
if colorspace:
|
||||||
color = colorspace
|
color = colorspace
|
||||||
|
@ -259,18 +259,14 @@ def convert(images, dpi=None, pagesize=(None, None), title=None, author=None,
|
||||||
width, height = imgdata.size
|
width, height = imgdata.size
|
||||||
imgformat = imgdata.format
|
imgformat = imgdata.format
|
||||||
|
|
||||||
if dpi:
|
ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi))
|
||||||
ndpi = dpi, dpi
|
# in python3, the returned dpi value for some tiff images will
|
||||||
debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)
|
# not be an integer but a float. To make the behaviour of
|
||||||
else:
|
# img2pdf the same between python2 and python3, we convert that
|
||||||
ndpi = imgdata.info.get("dpi", (96, 96))
|
# float into an integer by rounding
|
||||||
# in python3, the returned dpi value for some tiff images will
|
# search online for the 72.009 dpi problem for more info
|
||||||
# not be an integer but a float. To make the behaviour of
|
ndpi = (int(round(ndpi[0])),int(round(ndpi[1])))
|
||||||
# img2pdf the same between python2 and python3, we convert that
|
debug_out("input dpi = %d x %d" % ndpi, verbose)
|
||||||
# float into an integer by rounding
|
|
||||||
# search online for the 72.009 dpi problem for more info
|
|
||||||
ndpi = (int(round(ndpi[0])),int(round(ndpi[1])))
|
|
||||||
debug_out("input dpi = %d x %d"%ndpi, verbose)
|
|
||||||
|
|
||||||
if colorspace:
|
if colorspace:
|
||||||
color = colorspace
|
color = colorspace
|
||||||
|
@ -292,6 +288,13 @@ def convert(images, dpi=None, pagesize=(None, None), title=None, author=None,
|
||||||
debug_out("width x height = %d x %d"%(width,height), verbose)
|
debug_out("width x height = %d x %d"%(width,height), verbose)
|
||||||
debug_out("imgformat = %s"%imgformat, verbose)
|
debug_out("imgformat = %s"%imgformat, verbose)
|
||||||
|
|
||||||
|
if dpi:
|
||||||
|
ndpi = dpi, dpi
|
||||||
|
debug_out("input dpi (forced) = %d x %d" % ndpi, verbose)
|
||||||
|
elif pagesize_options:
|
||||||
|
ndpi = get_ndpi(width, height, pagesize)
|
||||||
|
debug_out("calculated dpi (based on pagesize) = %d x %d" % ndpi, verbose)
|
||||||
|
|
||||||
# depending on the input format, determine whether to pass the raw
|
# depending on the input format, determine whether to pass the raw
|
||||||
# image or the zlib compressed color information
|
# image or the zlib compressed color information
|
||||||
if imgformat is "JPEG" or imgformat is "JPEG2000":
|
if imgformat is "JPEG" or imgformat is "JPEG2000":
|
||||||
|
@ -320,21 +323,43 @@ def convert(images, dpi=None, pagesize=(None, None), title=None, author=None,
|
||||||
imgdata = zlib.compress(img)
|
imgdata = zlib.compress(img)
|
||||||
im.close()
|
im.close()
|
||||||
|
|
||||||
# pdf units = 1/72 inch
|
if pagesize_options and pagesize_options['exact'][1]:
|
||||||
if not pagesize[0] and not pagesize[1]:
|
# output size exactly to specified dimensions
|
||||||
pdf_x, pdf_y = 72.0*width/float(ndpi[0]), 72.0*height/float(ndpi[1])
|
# pagesize[0], pagesize[1] already checked in valid_size()
|
||||||
elif not pagesize[1]:
|
pdf_x, pdf_y = pagesize[0], pagesize[1]
|
||||||
pdf_x, pdf_y = pagesize[0], pagesize[0]*height/float(width)
|
|
||||||
elif not pagesize[0]:
|
|
||||||
pdf_x, pdf_y = pagesize[1]*width/float(height), pagesize[1]
|
|
||||||
else:
|
else:
|
||||||
pdf_x = pagesize[0]
|
# output size based on dpi; point = 1/72 inch
|
||||||
pdf_y = pagesize[1]
|
pdf_x, pdf_y = 72.0*width/float(ndpi[0]), 72.0*height/float(ndpi[1])
|
||||||
|
|
||||||
pdf.addimage(color, width, height, imgformat, imgdata, pdf_x, pdf_y)
|
pdf.addimage(color, width, height, imgformat, imgdata, pdf_x, pdf_y)
|
||||||
|
|
||||||
return pdf.tostring()
|
return pdf.tostring()
|
||||||
|
|
||||||
|
def get_ndpi(width, height, pagesize):
|
||||||
|
pagesize_options = pagesize[2]
|
||||||
|
|
||||||
|
if pagesize_options and pagesize_options['fill'][1]:
|
||||||
|
if width/height < pagesize[0]/pagesize[1]:
|
||||||
|
tmp_dpi = 72.0*width/pagesize[0]
|
||||||
|
else:
|
||||||
|
tmp_dpi = 72.0*height/pagesize[1]
|
||||||
|
elif pagesize[0] and pagesize[1]:
|
||||||
|
# if both height and width given with no specific pagesize_option,
|
||||||
|
# resize to fit "into" page
|
||||||
|
if width/height < pagesize[0]/pagesize[1]:
|
||||||
|
tmp_dpi = 72.0*height/pagesize[1]
|
||||||
|
else:
|
||||||
|
tmp_dpi = 72.0*width/pagesize[0]
|
||||||
|
elif pagesize[0]:
|
||||||
|
# if width given, calculate dpi based on width
|
||||||
|
tmp_dpi = 72.0*width/pagesize[0]
|
||||||
|
elif pagesize[1]:
|
||||||
|
# if height given, calculate dpi based on height
|
||||||
|
tmp_dpi = 72.0*height/pagesize[1]
|
||||||
|
else:
|
||||||
|
tmp_dpi = default_dpi
|
||||||
|
|
||||||
|
return tmp_dpi, tmp_dpi
|
||||||
|
|
||||||
def positive_float(string):
|
def positive_float(string):
|
||||||
value = float(string)
|
value = float(string)
|
||||||
|
@ -346,22 +371,142 @@ def positive_float(string):
|
||||||
def valid_date(string):
|
def valid_date(string):
|
||||||
return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")
|
return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")
|
||||||
|
|
||||||
|
def get_standard_papersize(string):
|
||||||
|
papersizes = {
|
||||||
|
"11x17" : "792x792^", # "792x1224",
|
||||||
|
"ledger" : "792x792^", # "1224x792",
|
||||||
|
"legal" : "612x612^", # "612x1008",
|
||||||
|
"letter" : "612x612^", # "612x792",
|
||||||
|
"arche" : "2592x2592^", # "2592x3456",
|
||||||
|
"archd" : "1728x1728^", # "1728x2592",
|
||||||
|
"archc" : "1296x1296^", # "1296x1728",
|
||||||
|
"archb" : "864x864^", # "864x1296",
|
||||||
|
"archa" : "648x648^", # "648x864",
|
||||||
|
"a0" : "2380x2380^", # "2380x3368",
|
||||||
|
"a1" : "1684x1684^", # "1684x2380",
|
||||||
|
"a2" : "1190x1190^", # "1190x1684",
|
||||||
|
"a3" : "842x842^", # "842x1190",
|
||||||
|
"a4" : "595x595^", # "595x842",
|
||||||
|
"a5" : "421x421^", # "421x595",
|
||||||
|
"a6" : "297x297^", # "297x421",
|
||||||
|
"a7" : "210x210^", # "210x297",
|
||||||
|
"a8" : "148x148^", # "148x210",
|
||||||
|
"a9" : "105x105^", # "105x148",
|
||||||
|
"a10" : "74x74^", # "74x105",
|
||||||
|
"b0" : "2836x2836^", # "2836x4008",
|
||||||
|
"b1" : "2004x2004^", # "2004x2836",
|
||||||
|
"b2" : "1418x1418^", # "1418x2004",
|
||||||
|
"b3" : "1002x1002^", # "1002x1418",
|
||||||
|
"b4" : "709x709^", # "709x1002",
|
||||||
|
"b5" : "501x501^", # "501x709",
|
||||||
|
"c0" : "2600x2600^", # "2600x3677",
|
||||||
|
"c1" : "1837x1837^", # "1837x2600",
|
||||||
|
"c2" : "1298x1298^", # "1298x1837",
|
||||||
|
"c3" : "918x918^", # "918x1298",
|
||||||
|
"c4" : "649x649^", # "649x918",
|
||||||
|
"c5" : "459x459^", # "459x649",
|
||||||
|
"c6" : "323x323^", # "323x459",
|
||||||
|
"flsa" : "612x612^", # "612x936",
|
||||||
|
"flse" : "612x612^", # "612x936",
|
||||||
|
"halfletter" : "396x396^", # "396x612",
|
||||||
|
"tabloid" : "792x792^", # "792x1224",
|
||||||
|
"statement" : "396x396^", # "396x612",
|
||||||
|
"executive" : "540x540^", # "540x720",
|
||||||
|
"folio" : "612x612^", # "612x936",
|
||||||
|
"quarto" : "610x610^", # "610x780"
|
||||||
|
}
|
||||||
|
|
||||||
|
string = string.lower()
|
||||||
|
return papersizes.get(string, string)
|
||||||
|
|
||||||
def valid_size(string):
|
def valid_size(string):
|
||||||
tokens = string.split('x')
|
# conversion factors from units to points
|
||||||
if len(tokens) != 2:
|
units = {
|
||||||
msg = "input size needs to be of the format Ax, xB or AxB with A and B being integers"
|
'in' : 72.0,
|
||||||
|
'cm' : 72.0/2.54,
|
||||||
|
'mm' : 72.0/25.4,
|
||||||
|
'pt' : 1.0
|
||||||
|
}
|
||||||
|
|
||||||
|
pagesize_options = {
|
||||||
|
'exact' : ['\!', False],
|
||||||
|
'shrink' : ['\>', False],
|
||||||
|
'enlarge' : ['\<', False],
|
||||||
|
'fill' : ['\^', False],
|
||||||
|
'percent' : ['\%', False],
|
||||||
|
'count' : ['\@', False],
|
||||||
|
}
|
||||||
|
|
||||||
|
string = get_standard_papersize(string)
|
||||||
|
|
||||||
|
pattern = re.compile(r"""
|
||||||
|
([0-9]*\.?[0-9]*) # tokens.group(1) == width; may be empty
|
||||||
|
([a-z]*) # tokens.group(2) == units; may be empty
|
||||||
|
x
|
||||||
|
([0-9]*\.?[0-9]*) # tokens.group(3) == height; may be empty
|
||||||
|
([a-zA-Z]*) # tokens.group(4) == units; may be empty
|
||||||
|
([^0-9a-zA-Z]*) # tokens.group(5) == extra options
|
||||||
|
""", re.VERBOSE)
|
||||||
|
|
||||||
|
tokens = pattern.match(string)
|
||||||
|
|
||||||
|
# tokens.group(0) should match entire input string
|
||||||
|
if tokens.group(0) != string:
|
||||||
|
msg = ('Input size needs to be of the format AuxBv#, '
|
||||||
|
'where A is width, B is height, u and v are units, '
|
||||||
|
'# are options. '
|
||||||
|
'You may omit either width or height, but not both. '
|
||||||
|
'Units may be specified as (in, cm, mm, pt). '
|
||||||
|
'You may omit units, which will default to pt. '
|
||||||
|
'Available options include (! = exact ; ^ = fill ; default = into).')
|
||||||
raise argparse.ArgumentTypeError(msg)
|
raise argparse.ArgumentTypeError(msg)
|
||||||
x = tokens[0]
|
|
||||||
y = tokens[1]
|
# temporary list to loop through to process width and height
|
||||||
if x == '':
|
pagesize_size = {
|
||||||
x = None
|
'x' : [0, tokens.group(1), tokens.group(2)],
|
||||||
else:
|
'y' : [0, tokens.group(3), tokens.group(4)]
|
||||||
x = int(x)
|
}
|
||||||
if y == '':
|
|
||||||
y = None
|
for key, value in pagesize_size.items():
|
||||||
else:
|
try:
|
||||||
y = int(y)
|
value[0] = float(value[1])
|
||||||
return (x,y)
|
value[0] *= units[value[2]] # convert to points
|
||||||
|
except ValueError, e:
|
||||||
|
# assign None if width or height not provided
|
||||||
|
value[0] = None
|
||||||
|
except KeyError, e:
|
||||||
|
# if units unrecognized, raise error
|
||||||
|
# otherwise default to pt because units not provided
|
||||||
|
if value[2]:
|
||||||
|
msg = "unrecognized unit '%s'." % value[2]
|
||||||
|
raise argparse.ArgumentTypeError(msg)
|
||||||
|
|
||||||
|
x = pagesize_size['x'][0]
|
||||||
|
y = pagesize_size['y'][0]
|
||||||
|
|
||||||
|
# parse options for resize methods
|
||||||
|
if tokens.group(5):
|
||||||
|
for key, value in pagesize_options.items():
|
||||||
|
if re.search(value[0], tokens.group(5)):
|
||||||
|
value[1] = True
|
||||||
|
|
||||||
|
if pagesize_options['fill'][1]:
|
||||||
|
# if either width or height is not given, try to fill in missing value
|
||||||
|
if not x:
|
||||||
|
x = y
|
||||||
|
elif not y:
|
||||||
|
y = x
|
||||||
|
|
||||||
|
if pagesize_options['exact'][1]:
|
||||||
|
if not x or not y:
|
||||||
|
msg = ('exact size requires both width and height.')
|
||||||
|
raise argparse.ArgumentTypeError(msg)
|
||||||
|
|
||||||
|
if not x and not y:
|
||||||
|
msg = ('width and height cannot both be omitted.')
|
||||||
|
raise argparse.ArgumentTypeError(msg)
|
||||||
|
|
||||||
|
return (x, y, pagesize_options)
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='Lossless conversion/embedding of images (in)to pdf')
|
description='Lossless conversion/embedding of images (in)to pdf')
|
||||||
|
@ -370,16 +515,29 @@ parser.add_argument(
|
||||||
nargs='+', help='input file(s)')
|
nargs='+', help='input file(s)')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-o', '--output', metavar='out', type=argparse.FileType('wb'),
|
'-o', '--output', metavar='out', type=argparse.FileType('wb'),
|
||||||
default=getattr(sys.stdout, "buffer", sys.stdout), help='output file (default: stdout)')
|
default=getattr(sys.stdout, "buffer", sys.stdout),
|
||||||
|
help='output file (default: stdout)')
|
||||||
|
|
||||||
sizeopts = parser.add_mutually_exclusive_group()
|
sizeopts = parser.add_mutually_exclusive_group()
|
||||||
sizeopts.add_argument(
|
sizeopts.add_argument(
|
||||||
'-d', '--dpi', metavar='dpi', type=positive_float,
|
'-d', '--dpi', metavar='dpi', type=positive_float,
|
||||||
help='dpi for pdf output. If input image does not specify dpi the default is 96.0. Must not be specified together with -s/--pagesize.')
|
help=('dpi for pdf output. '
|
||||||
|
'If input image does not specify dpi the default is %.2f. '
|
||||||
|
'Must not be used with -s/--pagesize.') % default_dpi
|
||||||
|
)
|
||||||
|
|
||||||
sizeopts.add_argument(
|
sizeopts.add_argument(
|
||||||
'-s', '--pagesize', metavar='size', type=valid_size,
|
'-s', '--pagesize', metavar='size', type=valid_size,
|
||||||
default=(None, None),
|
default=(None, None, None),
|
||||||
help='size of the pages in the pdf output in format AxB with A and B being width and height of the page in points. You can omit either one of them. Must not be specified together with -d/--dpi.')
|
help=('size of the pdf pages in format AuxBv#, '
|
||||||
|
'where A is width, B is height, u and v are units, # are options. '
|
||||||
|
'You may omit either width or height, but not both. '
|
||||||
|
'Some common page sizes, such as letter and a4, are also recognized. '
|
||||||
|
'Units may be specified as (in, cm, mm, pt). '
|
||||||
|
'Units default to pt when absent. '
|
||||||
|
'Available options include (! = exact ; ^ = fill ; default = into). '
|
||||||
|
'Must not be used with -d/--dpi.')
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-t', '--title', metavar='title', type=str,
|
'-t', '--title', metavar='title', type=str,
|
||||||
|
|
Loading…
Reference in a new issue