img2pdf/src/img2pdf.py

#!/usr/bin/env python2

# Copyright (C) 2012-2014 Johannes 'josch' Schauer <j.schauer at email.de>
#
# This program is free software: you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program.  If not, see
# <http://www.gnu.org/licenses/>.

import sys
import zlib
import argparse
import struct
from PIL import Image
from datetime import datetime
from jp2 import parsejp2

# XXX: Switch to use logging module.
def debug_out(message, verbose=True):
    if verbose:
        sys.stderr.write("D: "+message+"\n")

def error_out(message):
    sys.stderr.write("E: "+message+"\n")

def warning_out(message):
    sys.stderr.write("W: "+message+"\n")

def parse(cont, indent=1):
    if type(cont) is dict:
        return b"<<\n"+b"\n".join(
            [4 * indent * b" " + k.encode("utf8") + b" " + parse(v, indent+1)
             for k, v in cont.items()])+b"\n"+4*(indent-1)*b" "+b">>"
    elif type(cont) is int or type(cont) is float:
        return str(cont).encode("utf8")
    elif isinstance(cont, obj):
        return ("%d 0 R"%cont.identifier).encode("utf8")
    elif type(cont) is str:
        return cont.encode("utf8")
    elif type(cont) is bytes:
        return cont
    elif type(cont) is list:
        return b"[ "+b" ".join([parse(c, indent) for c in cont])+b" ]"

class obj(object):
    def __init__(self, content, stream=None):
        self.content = content
        self.stream = stream

    def tostring(self):
        if self.stream:
            return (
                ("%d 0 obj " % self.identifier).encode("utf8") +
                parse(self.content) +
                b"\nstream\n" + self.stream + b"\nendstream\nendobj\n")
        else:
            return ("%d 0 obj "%self.identifier).encode("utf8")+parse(self.content)+b" endobj\n"

class pdfdoc(object):

    def __init__(self, version=3, title=None, author=None, creator=None,
                 producer=None, creationdate=None, moddate=None, subject=None,
                 keywords=None):
        self.version = version # default pdf version 1.3
        now = datetime.now()
        self.objects = []

        info = {}
        if title:
            info["/Title"] = "("+title+")"
        if author:
            info["/Author"] = "("+author+")"
        if creator:
            info["/Creator"] = "("+creator+")"
        if producer:
            info["/Producer"] = "("+producer+")"
        if creationdate:
            info["/CreationDate"] = "(D:"+creationdate.strftime("%Y%m%d%H%M%S")+")"
        else:
            info["/CreationDate"] = "(D:"+now.strftime("%Y%m%d%H%M%S")+")"
        if moddate:
            info["/ModDate"] = "(D:"+moddate.strftime("%Y%m%d%H%M%S")+")"
        else:
            info["/ModDate"] = "(D:"+now.strftime("%Y%m%d%H%M%S")+")"
        if subject:
            info["/Subject"] = "("+subject+")"
        if keywords:
            info["/Keywords"] = "("+",".join(keywords)+")"

        self.info = obj(info)

        # create an incomplete pages object so that a /Parent entry can be
        # added to each page
        self.pages = obj({
            "/Type": "/Pages",
            "/Kids": [],
            "/Count": 0
        })

        self.catalog = obj({
            "/Pages": self.pages,
            "/Type": "/Catalog"
        })
        self.addobj(self.catalog)
        self.addobj(self.pages)

    def addobj(self, obj):
        newid = len(self.objects)+1
        obj.identifier = newid
        self.objects.append(obj)

    def addimage(self, color, width, height, imgformat, imgdata, pdf_x, pdf_y):
        if color == 'L':
            color = "/DeviceGray"
        elif color == 'RGB':
            color = "/DeviceRGB"
        else:
            error_out("unsupported color space: %s"%color)
            exit(1)

        if pdf_x < 3.00 or pdf_y < 3.00:
            warning_out("pdf width or height is below 3.00 - decrease the dpi")

        # either embed the whole jpeg or deflate the bitmap representation
        if imgformat is "JPEG":
            ofilter = [ "/DCTDecode" ]
        elif imgformat is "JPEG2000":
            ofilter = [ "/JPXDecode" ]
            self.version = 5 # jpeg2000 needs pdf 1.5
        else:
            ofilter = [ "/FlateDecode" ]
        image = obj({
            "/Type": "/XObject",
            "/Subtype": "/Image",
            "/Filter": ofilter,
            "/Width": width,
            "/Height": height,
            "/ColorSpace": color,
            # hardcoded as PIL doesnt provide bits for non-jpeg formats
            "/BitsPerComponent": 8,
            "/Length": len(imgdata)
        }, imgdata)

        text = ("q\n%f 0 0 %f 0 0 cm\n/Im0 Do\nQ"%(pdf_x, pdf_y)).encode('utf8')

        content = obj({
            "/Length": len(text)
        }, text)

        page = obj({
            "/Type": "/Page",
            "/Parent": self.pages,
            "/Resources": {
                "/XObject": {
                    "/Im0": image
                }
            },
            "/MediaBox": [0, 0, pdf_x, pdf_y],
            "/Contents": content
        })
        self.pages.content["/Kids"].append(page)
        self.pages.content["/Count"] += 1
        self.addobj(page)
        self.addobj(content)
        self.addobj(image)

    def tostring(self):
        # add info as last object
        self.addobj(self.info)

        xreftable = list()

        result = ("%%PDF-1.%d\n"%self.version).encode("utf8")

        xreftable.append(b"0000000000 65535 f \n")
        for o in self.objects:
            xreftable.append(("%010d 00000 n \n"%len(result)).encode("utf8"))
            result += o.tostring()

        xrefoffset = len(result)
        result += b"xref\n"
        result += ("0 %d\n"%len(xreftable)).encode("utf8")
        for x in xreftable:
            result += x
        result += b"trailer\n"
        result += parse({"/Size": len(xreftable), "/Info": self.info, "/Root": self.catalog})+b"\n"
        result += b"startxref\n"
        result += ("%d\n"%xrefoffset).encode("utf8")
        result += b"%%EOF\n"
        return result

def convert(images, dpi, x, y, title=None, author=None, creator=None, producer=None,
            creationdate=None, moddate=None, subject=None, keywords=None,
            colorspace=None, verbose=False):

    pdf = pdfdoc(3, title, author, creator, producer, creationdate,
                 moddate, subject, keywords)

    for imfilename in images:
        debug_out("Reading %s"%imfilename, verbose)
        with open(imfilename, "rb") as im:
            rawdata = im.read()
            im.seek(0)
            try:
                imgdata = Image.open(im)
            except IOError as e:
                # test if it is a jpeg2000 image
                if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
                    error_out("cannot read input image (not jpeg2000)")
                    error_out("PIL: %s"%e)
                    exit(1)
                # image is jpeg2000
                width, height, ics = parsejp2(rawdata)
                imgformat = "JPEG2000"

                if dpi:
                    ndpi = dpi, dpi
                    debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)
                else:
                    ndpi = (96, 96) # TODO: read real dpi
                    debug_out("input dpi = %d x %d"%ndpi, verbose)

                if colorspace:
                    color = colorspace
                    debug_out("input colorspace (forced) = %s"%(ics))
                else:
                    color = ics
                    debug_out("input colorspace = %s"%(ics), verbose)
            else:
                width, height = imgdata.size
                imgformat = imgdata.format

                if dpi:
                    ndpi = dpi, dpi
                    debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)
                else:
                    ndpi = imgdata.info.get("dpi", (96, 96))
                    debug_out("input dpi = %d x %d"%ndpi, verbose)

                if colorspace:
                    color = colorspace
                    debug_out("input colorspace (forced) = %s"%(color), verbose)
                else:
                    color = imgdata.mode
                    debug_out("input colorspace = %s"%(color), verbose)

            debug_out("width x height = %d x %d"%(width,height), verbose)
            debug_out("imgformat = %s"%imgformat, verbose)

            # depending on the input format, determine whether to pass the raw
            # image or the zlib compressed color information
            if imgformat is "JPEG" or imgformat is "JPEG2000":
                if color == '1':
                    error_out("jpeg can't be monochrome")
                    exit(1)
                imgdata = rawdata
            else:
                # because we do not support /CCITTFaxDecode
                if color == '1':
                    debug_out("Converting colorspace 1 to L", verbose)
                    imgdata = imgdata.convert('L')
                    color = 'L'
                elif color in ("RGB", "L"):
                    debug_out("Colorspace is OK: %s"%color, verbose)
                else:
                    debug_out("Converting colorspace %s to RGB"%color, verbose)
                    imgdata = imgdata.convert('RGB')
                    color = imgdata.mode
                imgdata = zlib.compress(imgdata.tostring())

        # pdf units = 1/72 inch
        if not x and not y:
            pdf_x, pdf_y = 72.0*width/ndpi[0], 72.0*height/ndpi[1]
        elif not y:
            pdf_x, pdf_y = x, x*height/width
        elif not x:
            pdf_x, pdf_y = y*width/height, y

        pdf.addimage(color, width, height, imgformat, imgdata, pdf_x, pdf_y)

    return pdf.tostring()


def positive_float(string):
    value = float(string)
    if value <= 0:
        msg = "%r is not positive"%string
        raise argparse.ArgumentTypeError(msg)
    return value

def valid_date(string):
    return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")

parser = argparse.ArgumentParser(
    description='Lossless conversion/embedding of images (in)to pdf')
parser.add_argument(
    'images', metavar='infile', type=str,
    nargs='+', help='input file(s)')
parser.add_argument(
    '-o', '--output', metavar='out', type=argparse.FileType('wb'),
    default=sys.stdout, help='output file (default: stdout)')
parser.add_argument(
    '-d', '--dpi', metavar='dpi', type=positive_float,
    help='dpi for pdf output (default: 96.0)')
parser.add_argument(
    '-x', metavar='pdf_x', type=positive_float,
    help='output width in points')
parser.add_argument(
    '-y', metavar='pdf_y', type=positive_float,
    help='output height in points')
parser.add_argument(
    '-t', '--title', metavar='title', type=str,
    help='title for metadata')
parser.add_argument(
    '-a', '--author', metavar='author', type=str,
    help='author for metadata')
parser.add_argument(
    '-c', '--creator', metavar='creator', type=str,
    help='creator for metadata')
parser.add_argument(
    '-p', '--producer', metavar='producer', type=str,
    help='producer for metadata')
parser.add_argument(
    '-r', '--creationdate', metavar='creationdate', type=valid_date,
    help='creation date for metadata in YYYY-MM-DDTHH:MM:SS format')
parser.add_argument(
    '-m', '--moddate', metavar='moddate', type=valid_date,
    help='modification date for metadata in YYYY-MM-DDTHH:MM:SS format')
parser.add_argument(
    '-s', '--subject', metavar='subject', type=str,
    help='subject for metadata')
parser.add_argument(
    '-k', '--keywords', metavar='kw', type=str, nargs='+',
    help='keywords for metadata')
parser.add_argument(
    '-C', '--colorspace', metavar='colorspace', type=str,
    help='force PIL colorspace (one of: RGB, L, 1)')
parser.add_argument(
    '-v', '--verbose', help='verbose mode', action="store_true")

def main(args=None):
    if args is None:
        args = sys.argv[1:]
    args = parser.parse_args(args)

    args.output.write(
        convert(
            args.images, args.dpi, args.x, args.y, args.title, args.author,
            args.creator, args.producer, args.creationdate, args.moddate,
            args.subject, args.keywords, args.colorspace, args.verbose))

if __name__ == '__main__':
    main()
Added python shebang Added python shebang to be able to launch script directly 2014-07-26 14:12:40 +00:00			`#!/usr/bin/env python2`

license change from GPL to LGPL 2014-03-30 06:10:12 +00:00			`# Copyright (C) 2012-2014 Johannes 'josch' Schauer <j.schauer at email.de>`
add copyright notice 2013-05-02 06:17:13 +00:00			`#`
license change from GPL to LGPL 2014-03-30 06:10:12 +00:00			`# This program is free software: you can redistribute it and/or`
			`# modify it under the terms of the GNU Lesser General Public`
			`# License as published by the Free Software Foundation, either`
			`# version 3 of the License, or (at your option) any later`
			`# version.`
add copyright notice 2013-05-02 06:17:13 +00:00			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
license change from GPL to LGPL 2014-03-30 06:10:12 +00:00			`# You should have received a copy of the GNU General Public`
			`# License along with this program. If not, see`
			`# <http://www.gnu.org/licenses/>.`
initial commit 2012-03-29 09:08:32 +00:00
			`import sys`
			`import zlib`
			`import argparse`
JPEG2000 support 2012-03-29 09:53:57 +00:00			`import struct`
Added tests for the package. 2014-03-01 04:51:53 +00:00			`from PIL import Image`
initial commit 2012-03-29 09:08:32 +00:00			`from datetime import datetime`
better jp2 parsing based on jpylyzer 2013-10-21 13:55:47 +00:00			`from jp2 import parsejp2`
initial commit 2012-03-29 09:08:32 +00:00
Added tests for the package. 2014-03-01 04:51:53 +00:00			`# XXX: Switch to use logging module.`
			`def debug_out(message, verbose=True):`
			`if verbose:`
			`sys.stderr.write("D: "+message+"\n")`

			`def error_out(message):`
			`sys.stderr.write("E: "+message+"\n")`

			`def warning_out(message):`
			`sys.stderr.write("W: "+message+"\n")`

initial commit 2012-03-29 09:08:32 +00:00			`def parse(cont, indent=1):`
			`if type(cont) is dict:`
add Python 3 support 2015-01-07 14:56:24 +00:00			`return b"<<\n"+b"\n".join(`
			`[4 * indent * b" " + k.encode("utf8") + b" " + parse(v, indent+1)`
			`for k, v in cont.items()])+b"\n"+4(indent-1)b" "+b">>"`
initial commit 2012-03-29 09:08:32 +00:00			`elif type(cont) is int or type(cont) is float:`
add Python 3 support 2015-01-07 14:56:24 +00:00			`return str(cont).encode("utf8")`
initial commit 2012-03-29 09:08:32 +00:00			`elif isinstance(cont, obj):`
add Python 3 support 2015-01-07 14:56:24 +00:00			`return ("%d 0 R"%cont.identifier).encode("utf8")`
initial commit 2012-03-29 09:08:32 +00:00			`elif type(cont) is str:`
add Python 3 support 2015-01-07 14:56:24 +00:00			`return cont.encode("utf8")`
			`elif type(cont) is bytes:`
initial commit 2012-03-29 09:08:32 +00:00			`return cont`
			`elif type(cont) is list:`
add Python 3 support 2015-01-07 14:56:24 +00:00			`return b"[ "+b" ".join([parse(c, indent) for c in cont])+b" ]"`
initial commit 2012-03-29 09:08:32 +00:00
Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00			`class obj(object):`
initial commit 2012-03-29 09:08:32 +00:00			`def __init__(self, content, stream=None):`
			`self.content = content`
			`self.stream = stream`

major refactoring 2013-10-23 10:34:07 +00:00			`def tostring(self):`
initial commit 2012-03-29 09:08:32 +00:00			`if self.stream:`
Added tests for the package. 2014-03-01 04:51:53 +00:00			`return (`
add Python 3 support 2015-01-07 14:56:24 +00:00			`("%d 0 obj " % self.identifier).encode("utf8") +`
Added tests for the package. 2014-03-01 04:51:53 +00:00			`parse(self.content) +`
add Python 3 support 2015-01-07 14:56:24 +00:00			`b"\nstream\n" + self.stream + b"\nendstream\nendobj\n")`
initial commit 2012-03-29 09:08:32 +00:00			`else:`
add Python 3 support 2015-01-07 14:56:24 +00:00			`return ("%d 0 obj "%self.identifier).encode("utf8")+parse(self.content)+b" endobj\n"`
initial commit 2012-03-29 09:08:32 +00:00
Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00			`class pdfdoc(object):`
add /Pages reference to /Pages object each /page object 2012-06-15 14:59:31 +00:00
Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00			`def __init__(self, version=3, title=None, author=None, creator=None,`
			`producer=None, creationdate=None, moddate=None, subject=None,`
			`keywords=None):`
major refactoring 2013-10-23 10:34:07 +00:00			`self.version = version # default pdf version 1.3`
			`now = datetime.now()`
Added tests for the package. 2014-03-01 04:51:53 +00:00			`self.objects = []`
major refactoring 2013-10-23 10:34:07 +00:00
Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00			`info = {}`
major refactoring 2013-10-23 10:34:07 +00:00			`if title:`
			`info["/Title"] = "("+title+")"`
			`if author:`
			`info["/Author"] = "("+author+")"`
			`if creator:`
			`info["/Creator"] = "("+creator+")"`
			`if producer:`
			`info["/Producer"] = "("+producer+")"`
			`if creationdate:`
			`info["/CreationDate"] = "(D:"+creationdate.strftime("%Y%m%d%H%M%S")+")"`
initial commit 2012-03-29 09:08:32 +00:00			`else:`
major refactoring 2013-10-23 10:34:07 +00:00			`info["/CreationDate"] = "(D:"+now.strftime("%Y%m%d%H%M%S")+")"`
			`if moddate:`
			`info["/ModDate"] = "(D:"+moddate.strftime("%Y%m%d%H%M%S")+")"`
			`else:`
			`info["/ModDate"] = "(D:"+now.strftime("%Y%m%d%H%M%S")+")"`
			`if subject:`
			`info["/Subject"] = "("+subject+")"`
			`if keywords:`
			`info["/Keywords"] = "("+",".join(keywords)+")"`

			`self.info = obj(info)`

Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00			`# create an incomplete pages object so that a /Parent entry can be`
			`# added to each page`
major refactoring 2013-10-23 10:34:07 +00:00			`self.pages = obj({`
			`"/Type": "/Pages",`
			`"/Kids": [],`
			`"/Count": 0`
			`})`
JPEG2000 support 2012-03-29 09:53:57 +00:00
major refactoring 2013-10-23 10:34:07 +00:00			`self.catalog = obj({`
			`"/Pages": self.pages,`
			`"/Type": "/Catalog"`
			`})`
			`self.addobj(self.catalog)`
			`self.addobj(self.pages)`
add --verbose flag 2013-08-30 08:45:43 +00:00
major refactoring 2013-10-23 10:34:07 +00:00			`def addobj(self, obj):`
			`newid = len(self.objects)+1`
			`obj.identifier = newid`
			`self.objects.append(obj)`

add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 2014-08-04 15:25:07 +00:00			`def addimage(self, color, width, height, imgformat, imgdata, pdf_x, pdf_y):`
initial commit 2012-03-29 09:08:32 +00:00			`if color == 'L':`
			`color = "/DeviceGray"`
			`elif color == 'RGB':`
			`color = "/DeviceRGB"`
			`else:`
add error and warning output 2013-10-23 06:49:43 +00:00			`error_out("unsupported color space: %s"%color)`
initial commit 2012-03-29 09:08:32 +00:00			`exit(1)`

check for minimum pdf page size 2013-10-23 06:49:59 +00:00			`if pdf_x < 3.00 or pdf_y < 3.00:`
			`warning_out("pdf width or height is below 3.00 - decrease the dpi")`

initial commit 2012-03-29 09:08:32 +00:00			`# either embed the whole jpeg or deflate the bitmap representation`
			`if imgformat is "JPEG":`
			`ofilter = [ "/DCTDecode" ]`
Add pillow 2.4.0 support Pillow 2.4.0 added support for JPEG2000 using OpenJPEG 2.0. Because Pillow calls the format JPEG2000 instead of JP2, we need to rename it to enable the optimized code path. Should still be backwards compatible. 2014-04-04 23:55:03 +00:00			`elif imgformat is "JPEG2000":`
JPEG2000 support 2012-03-29 09:53:57 +00:00			`ofilter = [ "/JPXDecode" ]`
major refactoring 2013-10-23 10:34:07 +00:00			`self.version = 5 # jpeg2000 needs pdf 1.5`
initial commit 2012-03-29 09:08:32 +00:00			`else:`
			`ofilter = [ "/FlateDecode" ]`
			`image = obj({`
			`"/Type": "/XObject",`
			`"/Subtype": "/Image",`
			`"/Filter": ofilter,`
			`"/Width": width,`
			`"/Height": height,`
			`"/ColorSpace": color,`
Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00			`# hardcoded as PIL doesnt provide bits for non-jpeg formats`
			`"/BitsPerComponent": 8,`
initial commit 2012-03-29 09:08:32 +00:00			`"/Length": len(imgdata)`
			`}, imgdata)`

add Python 3 support 2015-01-07 14:56:24 +00:00			`text = ("q\n%f 0 0 %f 0 0 cm\n/Im0 Do\nQ"%(pdf_x, pdf_y)).encode('utf8')`
initial commit 2012-03-29 09:08:32 +00:00
			`content = obj({`
			`"/Length": len(text)`
			`}, text)`

			`page = obj({`
			`"/Type": "/Page",`
major refactoring 2013-10-23 10:34:07 +00:00			`"/Parent": self.pages,`
initial commit 2012-03-29 09:08:32 +00:00			`"/Resources": {`
			`"/XObject": {`
			`"/Im0": image`
			`}`
			`},`
			`"/MediaBox": [0, 0, pdf_x, pdf_y],`
			`"/Contents": content`
			`})`
major refactoring 2013-10-23 10:34:07 +00:00			`self.pages.content["/Kids"].append(page)`
			`self.pages.content["/Count"] += 1`
			`self.addobj(page)`
			`self.addobj(content)`
			`self.addobj(image)`

			`def tostring(self):`
			`# add info as last object`
			`self.addobj(self.info)`

			`xreftable = list()`

add Python 3 support 2015-01-07 14:56:24 +00:00			`result = ("%%PDF-1.%d\n"%self.version).encode("utf8")`
major refactoring 2013-10-23 10:34:07 +00:00
add Python 3 support 2015-01-07 14:56:24 +00:00			`xreftable.append(b"0000000000 65535 f \n")`
major refactoring 2013-10-23 10:34:07 +00:00			`for o in self.objects:`
add Python 3 support 2015-01-07 14:56:24 +00:00			`xreftable.append(("%010d 00000 n \n"%len(result)).encode("utf8"))`
major refactoring 2013-10-23 10:34:07 +00:00			`result += o.tostring()`

			`xrefoffset = len(result)`
add Python 3 support 2015-01-07 14:56:24 +00:00			`result += b"xref\n"`
			`result += ("0 %d\n"%len(xreftable)).encode("utf8")`
major refactoring 2013-10-23 10:34:07 +00:00			`for x in xreftable:`
			`result += x`
add Python 3 support 2015-01-07 14:56:24 +00:00			`result += b"trailer\n"`
			`result += parse({"/Size": len(xreftable), "/Info": self.info, "/Root": self.catalog})+b"\n"`
			`result += b"startxref\n"`
			`result += ("%d\n"%xrefoffset).encode("utf8")`
			`result += b"%%EOF\n"`
major refactoring 2013-10-23 10:34:07 +00:00			`return result`
initial commit 2012-03-29 09:08:32 +00:00
fix regression introduced by 9b35f5cf 2014-08-24 15:15:43 +00:00			`def convert(images, dpi, x, y, title=None, author=None, creator=None, producer=None,`
Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00			`creationdate=None, moddate=None, subject=None, keywords=None,`
			`colorspace=None, verbose=False):`
initial commit 2012-03-29 09:08:32 +00:00
Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00			`pdf = pdfdoc(3, title, author, creator, producer, creationdate,`
			`moddate, subject, keywords)`
initial commit 2012-03-29 09:08:32 +00:00
Avoid leaking file descriptors This change prevents img2pdf from opening all input files at once, which means it now works with thousands of input files. 2014-11-06 07:46:47 +00:00			`for imfilename in images:`
			`debug_out("Reading %s"%imfilename, verbose)`
Use "with" to open and close input files 2014-11-06 08:53:16 +00:00			`with open(imfilename, "rb") as im:`
			`rawdata = im.read()`
			`im.seek(0)`
			`try:`
			`imgdata = Image.open(im)`
			`except IOError as e:`
			`# test if it is a jpeg2000 image`
			`if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":`
			`error_out("cannot read input image (not jpeg2000)")`
			`error_out("PIL: %s"%e)`
			`exit(1)`
			`# image is jpeg2000`
			`width, height, ics = parsejp2(rawdata)`
			`imgformat = "JPEG2000"`

			`if dpi:`
			`ndpi = dpi, dpi`
			`debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)`
			`else:`
			`ndpi = (96, 96) # TODO: read real dpi`
			`debug_out("input dpi = %d x %d"%ndpi, verbose)`

			`if colorspace:`
			`color = colorspace`
			`debug_out("input colorspace (forced) = %s"%(ics))`
			`else:`
			`color = ics`
			`debug_out("input colorspace = %s"%(ics), verbose)`
major refactoring 2013-10-23 10:34:07 +00:00			`else:`
Use "with" to open and close input files 2014-11-06 08:53:16 +00:00			`width, height = imgdata.size`
			`imgformat = imgdata.format`

			`if dpi:`
			`ndpi = dpi, dpi`
			`debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)`
			`else:`
			`ndpi = imgdata.info.get("dpi", (96, 96))`
			`debug_out("input dpi = %d x %d"%ndpi, verbose)`

			`if colorspace:`
			`color = colorspace`
			`debug_out("input colorspace (forced) = %s"%(color), verbose)`
			`else:`
			`color = imgdata.mode`
			`debug_out("input colorspace = %s"%(color), verbose)`
major refactoring 2013-10-23 10:34:07 +00:00
fix problem with conversion of closed file 2014-11-10 09:13:52 +00:00			`debug_out("width x height = %d x %d"%(width,height), verbose)`
			`debug_out("imgformat = %s"%imgformat, verbose)`

			`# depending on the input format, determine whether to pass the raw`
			`# image or the zlib compressed color information`
			`if imgformat is "JPEG" or imgformat is "JPEG2000":`
			`if color == '1':`
			`error_out("jpeg can't be monochrome")`
			`exit(1)`
			`imgdata = rawdata`
Convert unrecognized colorspaces to RGB Instead of crashing on an unrecognized colorspace, we now do imgdata.convert('RGB'). 2014-11-06 07:47:42 +00:00			`else:`
fix problem with conversion of closed file 2014-11-10 09:13:52 +00:00			`# because we do not support /CCITTFaxDecode`
			`if color == '1':`
			`debug_out("Converting colorspace 1 to L", verbose)`
			`imgdata = imgdata.convert('L')`
			`color = 'L'`
			`elif color in ("RGB", "L"):`
			`debug_out("Colorspace is OK: %s"%color, verbose)`
			`else:`
			`debug_out("Converting colorspace %s to RGB"%color, verbose)`
			`imgdata = imgdata.convert('RGB')`
			`color = imgdata.mode`
			`imgdata = zlib.compress(imgdata.tostring())`
major refactoring 2013-10-23 10:34:07 +00:00
add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 2014-08-04 15:25:07 +00:00			`# pdf units = 1/72 inch`
fix regression introduced by 9b35f5cf 2014-08-24 15:15:43 +00:00			`if not x and not y:`
add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 2014-08-04 15:25:07 +00:00			`pdf_x, pdf_y = 72.0width/ndpi[0], 72.0height/ndpi[1]`
fix regression introduced by 9b35f5cf 2014-08-24 15:15:43 +00:00			`elif not y:`
			`pdf_x, pdf_y = x, x*height/width`
			`elif not x:`
			`pdf_x, pdf_y = y*width/height, y`
add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 2014-08-04 15:25:07 +00:00
			`pdf.addimage(color, width, height, imgformat, imgdata, pdf_x, pdf_y)`
major refactoring 2013-10-23 10:34:07 +00:00
			`return pdf.tostring()`
initial commit 2012-03-29 09:08:32 +00:00
Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00
			`def positive_float(string):`
			`value = float(string)`
			`if value <= 0:`
			`msg = "%r is not positive"%string`
			`raise argparse.ArgumentTypeError(msg)`
			`return value`

			`def valid_date(string):`
			`return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")`

			`parser = argparse.ArgumentParser(`
			`description='Lossless conversion/embedding of images (in)to pdf')`
			`parser.add_argument(`
Avoid leaking file descriptors This change prevents img2pdf from opening all input files at once, which means it now works with thousands of input files. 2014-11-06 07:46:47 +00:00			`'images', metavar='infile', type=str,`
Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00			`nargs='+', help='input file(s)')`
			`parser.add_argument(`
			`'-o', '--output', metavar='out', type=argparse.FileType('wb'),`
			`default=sys.stdout, help='output file (default: stdout)')`
			`parser.add_argument(`
			`'-d', '--dpi', metavar='dpi', type=positive_float,`
			`help='dpi for pdf output (default: 96.0)')`
add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 2014-08-04 15:25:07 +00:00			`parser.add_argument(`
			`'-x', metavar='pdf_x', type=positive_float,`
			`help='output width in points')`
			`parser.add_argument(`
			`'-y', metavar='pdf_y', type=positive_float,`
			`help='output height in points')`
Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00			`parser.add_argument(`
			`'-t', '--title', metavar='title', type=str,`
			`help='title for metadata')`
			`parser.add_argument(`
			`'-a', '--author', metavar='author', type=str,`
			`help='author for metadata')`
			`parser.add_argument(`
			`'-c', '--creator', metavar='creator', type=str,`
			`help='creator for metadata')`
			`parser.add_argument(`
			`'-p', '--producer', metavar='producer', type=str,`
			`help='producer for metadata')`
			`parser.add_argument(`
			`'-r', '--creationdate', metavar='creationdate', type=valid_date,`
			`help='creation date for metadata in YYYY-MM-DDTHH:MM:SS format')`
			`parser.add_argument(`
			`'-m', '--moddate', metavar='moddate', type=valid_date,`
			`help='modification date for metadata in YYYY-MM-DDTHH:MM:SS format')`
			`parser.add_argument(`
			`'-s', '--subject', metavar='subject', type=str,`
			`help='subject for metadata')`
			`parser.add_argument(`
			`'-k', '--keywords', metavar='kw', type=str, nargs='+',`
			`help='keywords for metadata')`
			`parser.add_argument(`
			`'-C', '--colorspace', metavar='colorspace', type=str,`
			`help='force PIL colorspace (one of: RGB, L, 1)')`
			`parser.add_argument(`
			`'-v', '--verbose', help='verbose mode', action="store_true")`

			`def main(args=None):`
			`if args is None:`
			`args = sys.argv[1:]`
			`args = parser.parse_args(args)`
add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 2014-08-04 15:25:07 +00:00
Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00			`args.output.write(`
			`convert(`
add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 2014-08-04 15:25:07 +00:00			`args.images, args.dpi, args.x, args.y, args.title, args.author,`
Start of converting the module to a proper package. 2014-03-01 03:57:40 +00:00			`args.creator, args.producer, args.creationdate, args.moddate,`
			`args.subject, args.keywords, args.colorspace, args.verbose))`
allow running src/img2pdf.py standalone 2014-03-14 18:13:03 +00:00
			`if __name__ == '__main__':`
			`main()`