img2pdf/src/img2pdf.py

#!/usr/bin/env python2

# Copyright (C) 2012-2014 Johannes 'josch' Schauer <j.schauer at email.de>
#
# This program is free software: you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program.  If not, see
# <http://www.gnu.org/licenses/>.

import sys
import zlib
import argparse
import struct
from PIL import Image
from datetime import datetime
from jp2 import parsejp2

# XXX: Switch to use logging module.
def debug_out(message, verbose=True):
    if verbose:
        sys.stderr.write("D: "+message+"\n")

def error_out(message):
    sys.stderr.write("E: "+message+"\n")

def warning_out(message):
    sys.stderr.write("W: "+message+"\n")

def parse(cont, indent=1):
    if type(cont) is dict:
        return b"<<\n"+b"\n".join(
            [4 * indent * b" " + k.encode("utf8") + b" " + parse(v, indent+1)
             for k, v in sorted(cont.items())])+b"\n"+4*(indent-1)*b" "+b">>"
    elif type(cont) is int or type(cont) is float:
        return str(cont).encode("utf8")
    elif isinstance(cont, obj):
        return ("%d 0 R"%cont.identifier).encode("utf8")
    elif type(cont) is str:
        return cont.encode("utf8")
    elif type(cont) is bytes:
        return cont
    elif type(cont) is list:
        return b"[ "+b" ".join([parse(c, indent) for c in cont])+b" ]"

class obj(object):
    def __init__(self, content, stream=None):
        self.content = content
        self.stream = stream

    def tostring(self):
        if self.stream:
            return (
                ("%d 0 obj " % self.identifier).encode("utf8") +
                parse(self.content) +
                b"\nstream\n" + self.stream + b"\nendstream\nendobj\n")
        else:
            return ("%d 0 obj "%self.identifier).encode("utf8")+parse(self.content)+b" endobj\n"

class pdfdoc(object):

    def __init__(self, version=3, title=None, author=None, creator=None,
                 producer=None, creationdate=None, moddate=None, subject=None,
                 keywords=None, nodate=False):
        self.version = version # default pdf version 1.3
        now = datetime.now()
        self.objects = []

        info = {}
        if title:
            info["/Title"] = "("+title+")"
        if author:
            info["/Author"] = "("+author+")"
        if creator:
            info["/Creator"] = "("+creator+")"
        if producer:
            info["/Producer"] = "("+producer+")"
        if creationdate:
            info["/CreationDate"] = "(D:"+creationdate.strftime("%Y%m%d%H%M%S")+")"
        elif not nodate:
            info["/CreationDate"] = "(D:"+now.strftime("%Y%m%d%H%M%S")+")"
        if moddate:
            info["/ModDate"] = "(D:"+moddate.strftime("%Y%m%d%H%M%S")+")"
        elif not nodate:
            info["/ModDate"] = "(D:"+now.strftime("%Y%m%d%H%M%S")+")"
        if subject:
            info["/Subject"] = "("+subject+")"
        if keywords:
            info["/Keywords"] = "("+",".join(keywords)+")"

        self.info = obj(info)

        # create an incomplete pages object so that a /Parent entry can be
        # added to each page
        self.pages = obj({
            "/Type": "/Pages",
            "/Kids": [],
            "/Count": 0
        })

        self.catalog = obj({
            "/Pages": self.pages,
            "/Type": "/Catalog"
        })
        self.addobj(self.catalog)
        self.addobj(self.pages)

    def addobj(self, obj):
        newid = len(self.objects)+1
        obj.identifier = newid
        self.objects.append(obj)

    def addimage(self, color, width, height, imgformat, imgdata, pdf_x, pdf_y):
        if color == 'L':
            colorspace = "/DeviceGray"
        elif color == 'RGB':
            colorspace = "/DeviceRGB"
        elif color == 'CMYK' or color == 'CMYK;I':
            colorspace = "/DeviceCMYK"
        else:
            error_out("unsupported color space: %s"%color)
            exit(1)

        if pdf_x < 3.00 or pdf_y < 3.00:
            warning_out("pdf width or height is below 3.00 - decrease the dpi")

        # either embed the whole jpeg or deflate the bitmap representation
        if imgformat is "JPEG":
            ofilter = [ "/DCTDecode" ]
        elif imgformat is "JPEG2000":
            ofilter = [ "/JPXDecode" ]
            self.version = 5 # jpeg2000 needs pdf 1.5
        else:
            ofilter = [ "/FlateDecode" ]
        image = obj({
            "/Type": "/XObject",
            "/Subtype": "/Image",
            "/Filter": ofilter,
            "/Width": width,
            "/Height": height,
            "/ColorSpace": colorspace,
            # hardcoded as PIL doesnt provide bits for non-jpeg formats
            "/BitsPerComponent": 8,
            "/Length": len(imgdata)
        }, imgdata)

        if color == 'CMYK;I':
            # Inverts all four channels
            image.content['/Decode'] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]

        text = ("q\n%f 0 0 %f 0 0 cm\n/Im0 Do\nQ"%(pdf_x, pdf_y)).encode('utf8')

        content = obj({
            "/Length": len(text)
        }, text)

        page = obj({
            "/Type": "/Page",
            "/Parent": self.pages,
            "/Resources": {
                "/XObject": {
                    "/Im0": image
                }
            },
            "/MediaBox": [0, 0, pdf_x, pdf_y],
            "/Contents": content
        })
        self.pages.content["/Kids"].append(page)
        self.pages.content["/Count"] += 1
        self.addobj(page)
        self.addobj(content)
        self.addobj(image)

    def tostring(self):
        # add info as last object
        self.addobj(self.info)

        xreftable = list()

        result = ("%%PDF-1.%d\n"%self.version).encode("utf8")

        xreftable.append(b"0000000000 65535 f \n")
        for o in self.objects:
            xreftable.append(("%010d 00000 n \n"%len(result)).encode("utf8"))
            result += o.tostring()

        xrefoffset = len(result)
        result += b"xref\n"
        result += ("0 %d\n"%len(xreftable)).encode("utf8")
        for x in xreftable:
            result += x
        result += b"trailer\n"
        result += parse({"/Size": len(xreftable), "/Info": self.info, "/Root": self.catalog})+b"\n"
        result += b"startxref\n"
        result += ("%d\n"%xrefoffset).encode("utf8")
        result += b"%%EOF\n"
        return result

def convert(images, dpi=None, x=None, y=None, title=None, author=None,
            creator=None, producer=None, creationdate=None, moddate=None,
            subject=None, keywords=None, colorspace=None, verbose=False):

    pdf = pdfdoc(3, title, author, creator, producer, creationdate,
                 moddate, subject, keywords)

    for imfilename in images:
        debug_out("Reading %s"%imfilename, verbose)
        with open(imfilename, "rb") as im:
            rawdata = im.read()
            im.seek(0)
            try:
                imgdata = Image.open(im)
            except IOError as e:
                # test if it is a jpeg2000 image
                if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
                    error_out("cannot read input image (not jpeg2000)")
                    error_out("PIL: %s"%e)
                    exit(1)
                # image is jpeg2000
                width, height, ics = parsejp2(rawdata)
                imgformat = "JPEG2000"

                if dpi:
                    ndpi = dpi, dpi
                    debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)
                else:
                    ndpi = (96, 96) # TODO: read real dpi
                    debug_out("input dpi = %d x %d"%ndpi, verbose)

                if colorspace:
                    color = colorspace
                    debug_out("input colorspace (forced) = %s"%(ics))
                else:
                    color = ics
                    debug_out("input colorspace = %s"%(ics), verbose)
            else:
                width, height = imgdata.size
                imgformat = imgdata.format

                if dpi:
                    ndpi = dpi, dpi
                    debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)
                else:
                    ndpi = imgdata.info.get("dpi", (96, 96))
                    debug_out("input dpi = %d x %d"%ndpi, verbose)

                if colorspace:
                    color = colorspace
                    debug_out("input colorspace (forced) = %s"%(color), verbose)
                else:
                    color = imgdata.mode
                    if color == "CMYK" and imgformat == "JPEG":
                        # Adobe inverts CMYK JPEGs for some reason, and others
                        # have followed suit as well. Some software assumes the
                        # JPEG is inverted if the Adobe tag (APP14), while other
                        # software assumes all CMYK JPEGs are inverted. I don't
                        # have enough experience with these to know which is
                        # better for images currently in the wild, so I'm going
                        # with the first approach for now.
                        if "adobe" in imgdata.info:
                            color = "CMYK;I"
                    debug_out("input colorspace = %s"%(color), verbose)

            debug_out("width x height = %d x %d"%(width,height), verbose)
            debug_out("imgformat = %s"%imgformat, verbose)

            # depending on the input format, determine whether to pass the raw
            # image or the zlib compressed color information
            if imgformat is "JPEG" or imgformat is "JPEG2000":
                if color == '1':
                    error_out("jpeg can't be monochrome")
                    exit(1)
                imgdata = rawdata
            else:
                # because we do not support /CCITTFaxDecode
                if color == '1':
                    debug_out("Converting colorspace 1 to L", verbose)
                    imgdata = imgdata.convert('L')
                    color = 'L'
                elif color in ("RGB", "L", "CMYK", "CMYK;I"):
                    debug_out("Colorspace is OK: %s"%color, verbose)
                else:
                    debug_out("Converting colorspace %s to RGB"%color, verbose)
                    imgdata = imgdata.convert('RGB')
                    color = imgdata.mode
                imgdata = zlib.compress(imgdata.tostring())

        # pdf units = 1/72 inch
        if not x and not y:
            pdf_x, pdf_y = 72.0*width/ndpi[0], 72.0*height/ndpi[1]
        elif not y:
            pdf_x, pdf_y = x, x*height/width
        elif not x:
            pdf_x, pdf_y = y*width/height, y
        else:
            pdf_x = x
            pdf_y = y

        pdf.addimage(color, width, height, imgformat, imgdata, pdf_x, pdf_y)

    return pdf.tostring()


def positive_float(string):
    value = float(string)
    if value <= 0:
        msg = "%r is not positive"%string
        raise argparse.ArgumentTypeError(msg)
    return value

def valid_date(string):
    return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")

parser = argparse.ArgumentParser(
    description='Lossless conversion/embedding of images (in)to pdf')
parser.add_argument(
    'images', metavar='infile', type=str,
    nargs='+', help='input file(s)')
parser.add_argument(
    '-o', '--output', metavar='out', type=argparse.FileType('wb'),
    default=sys.stdout, help='output file (default: stdout)')
parser.add_argument(
    '-d', '--dpi', metavar='dpi', type=positive_float,
    help='dpi for pdf output (default: 96.0)')
parser.add_argument(
    '-x', metavar='pdf_x', type=positive_float,
    help='output width in points')
parser.add_argument(
    '-y', metavar='pdf_y', type=positive_float,
    help='output height in points')
parser.add_argument(
    '-t', '--title', metavar='title', type=str,
    help='title for metadata')
parser.add_argument(
    '-a', '--author', metavar='author', type=str,
    help='author for metadata')
parser.add_argument(
    '-c', '--creator', metavar='creator', type=str,
    help='creator for metadata')
parser.add_argument(
    '-p', '--producer', metavar='producer', type=str,
    help='producer for metadata')
parser.add_argument(
    '-r', '--creationdate', metavar='creationdate', type=valid_date,
    help='creation date for metadata in YYYY-MM-DDTHH:MM:SS format')
parser.add_argument(
    '-m', '--moddate', metavar='moddate', type=valid_date,
    help='modification date for metadata in YYYY-MM-DDTHH:MM:SS format')
parser.add_argument(
    '-s', '--subject', metavar='subject', type=str,
    help='subject for metadata')
parser.add_argument(
    '-k', '--keywords', metavar='kw', type=str, nargs='+',
    help='keywords for metadata')
parser.add_argument(
    '-C', '--colorspace', metavar='colorspace', type=str,
    help='force PIL colorspace (one of: RGB, L, 1, CMYK, CMYK;I)')
parser.add_argument(
    '-D', '--nodate', help='do not add timestamps', action="store_true")
parser.add_argument(
    '-v', '--verbose', help='verbose mode', action="store_true")

def main(args=None):
    if args is None:
        args = sys.argv[1:]
    args = parser.parse_args(args)

    args.output.write(
        convert(
            args.images, args.dpi, args.x, args.y, args.title, args.author,
            args.creator, args.producer, args.creationdate, args.moddate,
            args.subject, args.keywords, args.colorspace, args.verbose))

if __name__ == '__main__':
    main()
Added python shebang Added python shebang to be able to launch script directly 10 years ago			`#!/usr/bin/env python2`

license change from GPL to LGPL 10 years ago			`# Copyright (C) 2012-2014 Johannes 'josch' Schauer <j.schauer at email.de>`
add copyright notice 11 years ago			`#`
license change from GPL to LGPL 10 years ago			`# This program is free software: you can redistribute it and/or`
			`# modify it under the terms of the GNU Lesser General Public`
			`# License as published by the Free Software Foundation, either`
			`# version 3 of the License, or (at your option) any later`
			`# version.`
add copyright notice 11 years ago			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
license change from GPL to LGPL 10 years ago			`# You should have received a copy of the GNU General Public`
			`# License along with this program. If not, see`
			`# <http://www.gnu.org/licenses/>.`
initial commit 12 years ago
			`import sys`
			`import zlib`
			`import argparse`
JPEG2000 support 12 years ago			`import struct`
Added tests for the package. 10 years ago			`from PIL import Image`
initial commit 12 years ago			`from datetime import datetime`
better jp2 parsing based on jpylyzer 11 years ago			`from jp2 import parsejp2`
initial commit 12 years ago
Added tests for the package. 10 years ago			`# XXX: Switch to use logging module.`
			`def debug_out(message, verbose=True):`
			`if verbose:`
			`sys.stderr.write("D: "+message+"\n")`

			`def error_out(message):`
			`sys.stderr.write("E: "+message+"\n")`

			`def warning_out(message):`
			`sys.stderr.write("W: "+message+"\n")`

initial commit 12 years ago			`def parse(cont, indent=1):`
			`if type(cont) is dict:`
add Python 3 support 9 years ago			`return b"<<\n"+b"\n".join(`
			`[4 * indent * b" " + k.encode("utf8") + b" " + parse(v, indent+1)`
make output reproducible by sorting and --nodate option 9 years ago			`for k, v in sorted(cont.items())])+b"\n"+4(indent-1)b" "+b">>"`
initial commit 12 years ago			`elif type(cont) is int or type(cont) is float:`
add Python 3 support 9 years ago			`return str(cont).encode("utf8")`
initial commit 12 years ago			`elif isinstance(cont, obj):`
add Python 3 support 9 years ago			`return ("%d 0 R"%cont.identifier).encode("utf8")`
initial commit 12 years ago			`elif type(cont) is str:`
add Python 3 support 9 years ago			`return cont.encode("utf8")`
			`elif type(cont) is bytes:`
initial commit 12 years ago			`return cont`
			`elif type(cont) is list:`
add Python 3 support 9 years ago			`return b"[ "+b" ".join([parse(c, indent) for c in cont])+b" ]"`
initial commit 12 years ago
Start of converting the module to a proper package. 10 years ago			`class obj(object):`
initial commit 12 years ago			`def __init__(self, content, stream=None):`
			`self.content = content`
			`self.stream = stream`

major refactoring 11 years ago			`def tostring(self):`
initial commit 12 years ago			`if self.stream:`
Added tests for the package. 10 years ago			`return (`
add Python 3 support 9 years ago			`("%d 0 obj " % self.identifier).encode("utf8") +`
Added tests for the package. 10 years ago			`parse(self.content) +`
add Python 3 support 9 years ago			`b"\nstream\n" + self.stream + b"\nendstream\nendobj\n")`
initial commit 12 years ago			`else:`
add Python 3 support 9 years ago			`return ("%d 0 obj "%self.identifier).encode("utf8")+parse(self.content)+b" endobj\n"`
initial commit 12 years ago
Start of converting the module to a proper package. 10 years ago			`class pdfdoc(object):`
add /Pages reference to /Pages object each /page object 12 years ago
Start of converting the module to a proper package. 10 years ago			`def __init__(self, version=3, title=None, author=None, creator=None,`
			`producer=None, creationdate=None, moddate=None, subject=None,`
make output reproducible by sorting and --nodate option 9 years ago			`keywords=None, nodate=False):`
major refactoring 11 years ago			`self.version = version # default pdf version 1.3`
			`now = datetime.now()`
Added tests for the package. 10 years ago			`self.objects = []`
major refactoring 11 years ago
Start of converting the module to a proper package. 10 years ago			`info = {}`
major refactoring 11 years ago			`if title:`
			`info["/Title"] = "("+title+")"`
			`if author:`
			`info["/Author"] = "("+author+")"`
			`if creator:`
			`info["/Creator"] = "("+creator+")"`
			`if producer:`
			`info["/Producer"] = "("+producer+")"`
			`if creationdate:`
			`info["/CreationDate"] = "(D:"+creationdate.strftime("%Y%m%d%H%M%S")+")"`
make output reproducible by sorting and --nodate option 9 years ago			`elif not nodate:`
major refactoring 11 years ago			`info["/CreationDate"] = "(D:"+now.strftime("%Y%m%d%H%M%S")+")"`
			`if moddate:`
			`info["/ModDate"] = "(D:"+moddate.strftime("%Y%m%d%H%M%S")+")"`
make output reproducible by sorting and --nodate option 9 years ago			`elif not nodate:`
major refactoring 11 years ago			`info["/ModDate"] = "(D:"+now.strftime("%Y%m%d%H%M%S")+")"`
			`if subject:`
			`info["/Subject"] = "("+subject+")"`
			`if keywords:`
			`info["/Keywords"] = "("+",".join(keywords)+")"`

			`self.info = obj(info)`

Start of converting the module to a proper package. 10 years ago			`# create an incomplete pages object so that a /Parent entry can be`
			`# added to each page`
major refactoring 11 years ago			`self.pages = obj({`
			`"/Type": "/Pages",`
			`"/Kids": [],`
			`"/Count": 0`
			`})`
JPEG2000 support 12 years ago
major refactoring 11 years ago			`self.catalog = obj({`
			`"/Pages": self.pages,`
			`"/Type": "/Catalog"`
			`})`
			`self.addobj(self.catalog)`
			`self.addobj(self.pages)`
add --verbose flag 11 years ago
major refactoring 11 years ago			`def addobj(self, obj):`
			`newid = len(self.objects)+1`
			`obj.identifier = newid`
			`self.objects.append(obj)`

add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 10 years ago			`def addimage(self, color, width, height, imgformat, imgdata, pdf_x, pdf_y):`
initial commit 12 years ago			`if color == 'L':`
Enable support for CMYK images CMYK TIFFs and JPEGs both work. CMYK JPEG2000 images have not been tested. Adobe Photoshop and some other software generate inverted CMYK JPEGs. The image is assumed to be inverted if the "Adobe" (APP14) tag is present. Images can be forced inverted with `-C "CMYK;I"`, and forced not inverted with `-C CMYK`. 9 years ago			`colorspace = "/DeviceGray"`
initial commit 12 years ago			`elif color == 'RGB':`
Enable support for CMYK images CMYK TIFFs and JPEGs both work. CMYK JPEG2000 images have not been tested. Adobe Photoshop and some other software generate inverted CMYK JPEGs. The image is assumed to be inverted if the "Adobe" (APP14) tag is present. Images can be forced inverted with `-C "CMYK;I"`, and forced not inverted with `-C CMYK`. 9 years ago			`colorspace = "/DeviceRGB"`
			`elif color == 'CMYK' or color == 'CMYK;I':`
			`colorspace = "/DeviceCMYK"`
initial commit 12 years ago			`else:`
add error and warning output 11 years ago			`error_out("unsupported color space: %s"%color)`
initial commit 12 years ago			`exit(1)`

check for minimum pdf page size 11 years ago			`if pdf_x < 3.00 or pdf_y < 3.00:`
			`warning_out("pdf width or height is below 3.00 - decrease the dpi")`

initial commit 12 years ago			`# either embed the whole jpeg or deflate the bitmap representation`
			`if imgformat is "JPEG":`
			`ofilter = [ "/DCTDecode" ]`
Add pillow 2.4.0 support Pillow 2.4.0 added support for JPEG2000 using OpenJPEG 2.0. Because Pillow calls the format JPEG2000 instead of JP2, we need to rename it to enable the optimized code path. Should still be backwards compatible. 10 years ago			`elif imgformat is "JPEG2000":`
JPEG2000 support 12 years ago			`ofilter = [ "/JPXDecode" ]`
major refactoring 11 years ago			`self.version = 5 # jpeg2000 needs pdf 1.5`
initial commit 12 years ago			`else:`
			`ofilter = [ "/FlateDecode" ]`
			`image = obj({`
			`"/Type": "/XObject",`
			`"/Subtype": "/Image",`
			`"/Filter": ofilter,`
			`"/Width": width,`
			`"/Height": height,`
Enable support for CMYK images CMYK TIFFs and JPEGs both work. CMYK JPEG2000 images have not been tested. Adobe Photoshop and some other software generate inverted CMYK JPEGs. The image is assumed to be inverted if the "Adobe" (APP14) tag is present. Images can be forced inverted with `-C "CMYK;I"`, and forced not inverted with `-C CMYK`. 9 years ago			`"/ColorSpace": colorspace,`
Start of converting the module to a proper package. 10 years ago			`# hardcoded as PIL doesnt provide bits for non-jpeg formats`
			`"/BitsPerComponent": 8,`
initial commit 12 years ago			`"/Length": len(imgdata)`
			`}, imgdata)`

Enable support for CMYK images CMYK TIFFs and JPEGs both work. CMYK JPEG2000 images have not been tested. Adobe Photoshop and some other software generate inverted CMYK JPEGs. The image is assumed to be inverted if the "Adobe" (APP14) tag is present. Images can be forced inverted with `-C "CMYK;I"`, and forced not inverted with `-C CMYK`. 9 years ago			`if color == 'CMYK;I':`
			`# Inverts all four channels`
			`image.content['/Decode'] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]`

add Python 3 support 9 years ago			`text = ("q\n%f 0 0 %f 0 0 cm\n/Im0 Do\nQ"%(pdf_x, pdf_y)).encode('utf8')`
initial commit 12 years ago
			`content = obj({`
			`"/Length": len(text)`
			`}, text)`

			`page = obj({`
			`"/Type": "/Page",`
major refactoring 11 years ago			`"/Parent": self.pages,`
initial commit 12 years ago			`"/Resources": {`
			`"/XObject": {`
			`"/Im0": image`
			`}`
			`},`
			`"/MediaBox": [0, 0, pdf_x, pdf_y],`
			`"/Contents": content`
			`})`
major refactoring 11 years ago			`self.pages.content["/Kids"].append(page)`
			`self.pages.content["/Count"] += 1`
			`self.addobj(page)`
			`self.addobj(content)`
			`self.addobj(image)`

			`def tostring(self):`
			`# add info as last object`
			`self.addobj(self.info)`

			`xreftable = list()`

add Python 3 support 9 years ago			`result = ("%%PDF-1.%d\n"%self.version).encode("utf8")`
major refactoring 11 years ago
add Python 3 support 9 years ago			`xreftable.append(b"0000000000 65535 f \n")`
major refactoring 11 years ago			`for o in self.objects:`
add Python 3 support 9 years ago			`xreftable.append(("%010d 00000 n \n"%len(result)).encode("utf8"))`
major refactoring 11 years ago			`result += o.tostring()`

			`xrefoffset = len(result)`
add Python 3 support 9 years ago			`result += b"xref\n"`
			`result += ("0 %d\n"%len(xreftable)).encode("utf8")`
major refactoring 11 years ago			`for x in xreftable:`
			`result += x`
add Python 3 support 9 years ago			`result += b"trailer\n"`
			`result += parse({"/Size": len(xreftable), "/Info": self.info, "/Root": self.catalog})+b"\n"`
			`result += b"startxref\n"`
			`result += ("%d\n"%xrefoffset).encode("utf8")`
			`result += b"%%EOF\n"`
major refactoring 11 years ago			`return result`
initial commit 12 years ago
make 2nd, 3rd and 4th argument optional by supplying None as default 9 years ago			`def convert(images, dpi=None, x=None, y=None, title=None, author=None,`
			`creator=None, producer=None, creationdate=None, moddate=None,`
			`subject=None, keywords=None, colorspace=None, verbose=False):`
initial commit 12 years ago
Start of converting the module to a proper package. 10 years ago			`pdf = pdfdoc(3, title, author, creator, producer, creationdate,`
			`moddate, subject, keywords)`
initial commit 12 years ago
Avoid leaking file descriptors This change prevents img2pdf from opening all input files at once, which means it now works with thousands of input files. 10 years ago			`for imfilename in images:`
			`debug_out("Reading %s"%imfilename, verbose)`
Use "with" to open and close input files 10 years ago			`with open(imfilename, "rb") as im:`
			`rawdata = im.read()`
			`im.seek(0)`
			`try:`
			`imgdata = Image.open(im)`
			`except IOError as e:`
			`# test if it is a jpeg2000 image`
			`if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":`
			`error_out("cannot read input image (not jpeg2000)")`
			`error_out("PIL: %s"%e)`
			`exit(1)`
			`# image is jpeg2000`
			`width, height, ics = parsejp2(rawdata)`
			`imgformat = "JPEG2000"`

			`if dpi:`
			`ndpi = dpi, dpi`
			`debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)`
			`else:`
			`ndpi = (96, 96) # TODO: read real dpi`
			`debug_out("input dpi = %d x %d"%ndpi, verbose)`

			`if colorspace:`
			`color = colorspace`
			`debug_out("input colorspace (forced) = %s"%(ics))`
			`else:`
			`color = ics`
			`debug_out("input colorspace = %s"%(ics), verbose)`
major refactoring 11 years ago			`else:`
Use "with" to open and close input files 10 years ago			`width, height = imgdata.size`
			`imgformat = imgdata.format`

			`if dpi:`
			`ndpi = dpi, dpi`
			`debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)`
			`else:`
			`ndpi = imgdata.info.get("dpi", (96, 96))`
			`debug_out("input dpi = %d x %d"%ndpi, verbose)`

			`if colorspace:`
			`color = colorspace`
			`debug_out("input colorspace (forced) = %s"%(color), verbose)`
			`else:`
			`color = imgdata.mode`
Enable support for CMYK images CMYK TIFFs and JPEGs both work. CMYK JPEG2000 images have not been tested. Adobe Photoshop and some other software generate inverted CMYK JPEGs. The image is assumed to be inverted if the "Adobe" (APP14) tag is present. Images can be forced inverted with `-C "CMYK;I"`, and forced not inverted with `-C CMYK`. 9 years ago			`if color == "CMYK" and imgformat == "JPEG":`
			`# Adobe inverts CMYK JPEGs for some reason, and others`
			`# have followed suit as well. Some software assumes the`
			`# JPEG is inverted if the Adobe tag (APP14), while other`
			`# software assumes all CMYK JPEGs are inverted. I don't`
			`# have enough experience with these to know which is`
			`# better for images currently in the wild, so I'm going`
			`# with the first approach for now.`
			`if "adobe" in imgdata.info:`
			`color = "CMYK;I"`
Use "with" to open and close input files 10 years ago			`debug_out("input colorspace = %s"%(color), verbose)`
major refactoring 11 years ago
fix problem with conversion of closed file 10 years ago			`debug_out("width x height = %d x %d"%(width,height), verbose)`
			`debug_out("imgformat = %s"%imgformat, verbose)`

			`# depending on the input format, determine whether to pass the raw`
			`# image or the zlib compressed color information`
			`if imgformat is "JPEG" or imgformat is "JPEG2000":`
			`if color == '1':`
			`error_out("jpeg can't be monochrome")`
			`exit(1)`
			`imgdata = rawdata`
Convert unrecognized colorspaces to RGB Instead of crashing on an unrecognized colorspace, we now do imgdata.convert('RGB'). 10 years ago			`else:`
fix problem with conversion of closed file 10 years ago			`# because we do not support /CCITTFaxDecode`
			`if color == '1':`
			`debug_out("Converting colorspace 1 to L", verbose)`
			`imgdata = imgdata.convert('L')`
			`color = 'L'`
Enable support for CMYK images CMYK TIFFs and JPEGs both work. CMYK JPEG2000 images have not been tested. Adobe Photoshop and some other software generate inverted CMYK JPEGs. The image is assumed to be inverted if the "Adobe" (APP14) tag is present. Images can be forced inverted with `-C "CMYK;I"`, and forced not inverted with `-C CMYK`. 9 years ago			`elif color in ("RGB", "L", "CMYK", "CMYK;I"):`
fix problem with conversion of closed file 10 years ago			`debug_out("Colorspace is OK: %s"%color, verbose)`
			`else:`
			`debug_out("Converting colorspace %s to RGB"%color, verbose)`
			`imgdata = imgdata.convert('RGB')`
			`color = imgdata.mode`
			`imgdata = zlib.compress(imgdata.tostring())`
major refactoring 11 years ago
add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 10 years ago			`# pdf units = 1/72 inch`
fix regression introduced by 9b35f5cf 10 years ago			`if not x and not y:`
add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 10 years ago			`pdf_x, pdf_y = 72.0width/ndpi[0], 72.0height/ndpi[1]`
fix regression introduced by 9b35f5cf 10 years ago			`elif not y:`
			`pdf_x, pdf_y = x, x*height/width`
			`elif not x:`
			`pdf_x, pdf_y = y*width/height, y`
Fix for adding custom resolutions 9 years ago			`else:`
			`pdf_x = x`
			`pdf_y = y`
add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 10 years ago
			`pdf.addimage(color, width, height, imgformat, imgdata, pdf_x, pdf_y)`
major refactoring 11 years ago
			`return pdf.tostring()`
initial commit 12 years ago
Start of converting the module to a proper package. 10 years ago
			`def positive_float(string):`
			`value = float(string)`
			`if value <= 0:`
			`msg = "%r is not positive"%string`
			`raise argparse.ArgumentTypeError(msg)`
			`return value`

			`def valid_date(string):`
			`return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")`

			`parser = argparse.ArgumentParser(`
			`description='Lossless conversion/embedding of images (in)to pdf')`
			`parser.add_argument(`
Avoid leaking file descriptors This change prevents img2pdf from opening all input files at once, which means it now works with thousands of input files. 10 years ago			`'images', metavar='infile', type=str,`
Start of converting the module to a proper package. 10 years ago			`nargs='+', help='input file(s)')`
			`parser.add_argument(`
			`'-o', '--output', metavar='out', type=argparse.FileType('wb'),`
			`default=sys.stdout, help='output file (default: stdout)')`
			`parser.add_argument(`
			`'-d', '--dpi', metavar='dpi', type=positive_float,`
			`help='dpi for pdf output (default: 96.0)')`
add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 10 years ago			`parser.add_argument(`
			`'-x', metavar='pdf_x', type=positive_float,`
			`help='output width in points')`
			`parser.add_argument(`
			`'-y', metavar='pdf_y', type=positive_float,`
			`help='output height in points')`
Start of converting the module to a proper package. 10 years ago			`parser.add_argument(`
			`'-t', '--title', metavar='title', type=str,`
			`help='title for metadata')`
			`parser.add_argument(`
			`'-a', '--author', metavar='author', type=str,`
			`help='author for metadata')`
			`parser.add_argument(`
			`'-c', '--creator', metavar='creator', type=str,`
			`help='creator for metadata')`
			`parser.add_argument(`
			`'-p', '--producer', metavar='producer', type=str,`
			`help='producer for metadata')`
			`parser.add_argument(`
			`'-r', '--creationdate', metavar='creationdate', type=valid_date,`
			`help='creation date for metadata in YYYY-MM-DDTHH:MM:SS format')`
			`parser.add_argument(`
			`'-m', '--moddate', metavar='moddate', type=valid_date,`
			`help='modification date for metadata in YYYY-MM-DDTHH:MM:SS format')`
			`parser.add_argument(`
			`'-s', '--subject', metavar='subject', type=str,`
			`help='subject for metadata')`
			`parser.add_argument(`
			`'-k', '--keywords', metavar='kw', type=str, nargs='+',`
			`help='keywords for metadata')`
			`parser.add_argument(`
			`'-C', '--colorspace', metavar='colorspace', type=str,`
Enable support for CMYK images CMYK TIFFs and JPEGs both work. CMYK JPEG2000 images have not been tested. Adobe Photoshop and some other software generate inverted CMYK JPEGs. The image is assumed to be inverted if the "Adobe" (APP14) tag is present. Images can be forced inverted with `-C "CMYK;I"`, and forced not inverted with `-C CMYK`. 9 years ago			`help='force PIL colorspace (one of: RGB, L, 1, CMYK, CMYK;I)')`
make output reproducible by sorting and --nodate option 9 years ago			`parser.add_argument(`
			`'-D', '--nodate', help='do not add timestamps', action="store_true")`
Start of converting the module to a proper package. 10 years ago			`parser.add_argument(`
			`'-v', '--verbose', help='verbose mode', action="store_true")`

			`def main(args=None):`
			`if args is None:`
			`args = sys.argv[1:]`
			`args = parser.parse_args(args)`
add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 10 years ago
Start of converting the module to a proper package. 10 years ago			`args.output.write(`
			`convert(`
add options to specify pdf dimensions in points add options specify output pdf dimensions in points: -x width; -y height. 10 years ago			`args.images, args.dpi, args.x, args.y, args.title, args.author,`
Start of converting the module to a proper package. 10 years ago			`args.creator, args.producer, args.creationdate, args.moddate,`
			`args.subject, args.keywords, args.colorspace, args.verbose))`
allow running src/img2pdf.py standalone 10 years ago
			`if __name__ == '__main__':`
			`main()`