img2pdf/src/img2pdf.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Copyright (C) 2012-2014 Johannes 'josch' Schauer <j.schauer at email.de>
#
# This program is free software: you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program.  If not, see
# <http://www.gnu.org/licenses/>.

import sys
import os
import zlib
import argparse
from PIL import Image
from datetime import datetime
from jp2 import parsejp2
from enum import Enum
from io import BytesIO
import logging
import struct

PY3 = sys.version_info[0] >= 3

__version__ = "0.3.0"
default_dpi = 96.0
papersizes = {
    "letter": "8.5inx11in",
    "a0":     "841mmx1189mm",
    "a1":     "594mmx841mm",
    "a2":     "420mmx594mm",
    "a3":     "297mmx420mm",
    "a4":     "210mmx297mm",
    "a5":     "148mmx210mm",
    "a6":     "105mmx148mm",
}
papernames = {
    "letter": "Letter",
    "a0":     "A0",
    "a1":     "A1",
    "a2":     "A2",
    "a3":     "A3",
    "a4":     "A4",
    "a5":     "A5",
    "a6":     "A6",
}


FitMode = Enum('FitMode', 'into fill exact shrink enlarge')

PageOrientation = Enum('PageOrientation', 'portrait landscape')

Colorspace = Enum('Colorspace', 'RGB L 1 CMYK CMYK;I RGBA P other')

ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 CCITTGroup4 PNG other')

PageMode = Enum('PageMode', 'none outlines thumbs')

PageLayout = Enum('PageLayout',
                  'single onecolumn twocolumnright twocolumnleft')

Magnification = Enum('Magnification', 'fit fith fitbh')

ImgSize = Enum('ImgSize', 'abs perc dpi')

Unit = Enum('Unit', 'pt cm mm inch')

ImgUnit = Enum('ImgUnit', 'pt cm mm inch perc dpi')


class NegativeDimensionError(Exception):
    pass


class UnsupportedColorspaceError(Exception):
    pass


class ImageOpenError(Exception):
    pass


class JpegColorspaceError(Exception):
    pass


class PdfTooLargeError(Exception):
    pass


# without pdfrw this function is a no-op
def my_convert_load(string):
    return string


def parse(cont, indent=1):
    if type(cont) is dict:
        return b"<<\n"+b"\n".join(
            [4 * indent * b" " + k + b" " + parse(v, indent+1)
             for k, v in sorted(cont.items())])+b"\n"+4*(indent-1)*b" "+b">>"
    elif type(cont) is int:
        return str(cont).encode()
    elif type(cont) is float:
        if int(cont) == cont:
            return parse(int(cont))
        else:
            return ("%0.4f" % cont).rstrip("0").encode()
    elif isinstance(cont, MyPdfDict):
        # if cont got an identifier, then addobj() has been called with it
        # and a link to it will be added, otherwise add it inline
        if hasattr(cont, "identifier"):
            return ("%d 0 R" % cont.identifier).encode()
        else:
            return parse(cont.content, indent)
    elif type(cont) is str or isinstance(cont, bytes):
        if type(cont) is str and type(cont) is not bytes:
            raise TypeError(
                "parse must be passed a bytes object in py3. Got: %s" % cont)
        return cont
    elif isinstance(cont, list):
        return b"[ "+b" ".join([parse(c, indent) for c in cont])+b" ]"
    else:
        raise TypeError("cannot handle type %s with content %s" % (type(cont),
                                                                   cont))


class MyPdfDict(object):
    def __init__(self, *args, **kw):
        self.content = dict()
        if args:
            if len(args) == 1:
                args = args[0]
            self.content.update(args)
        self.stream = None
        for key, value in kw.items():
            if key == "stream":
                self.stream = value
                self.content[MyPdfName.Length] = len(value)
            elif key == "indirect":
                pass
            else:
                self.content[getattr(MyPdfName, key)] = value

    def tostring(self):
        if self.stream is not None:
            return (
                ("%d 0 obj\n" % self.identifier).encode() +
                parse(self.content) +
                b"\nstream\n" + self.stream + b"\nendstream\nendobj\n")
        else:
            return ("%d 0 obj\n" % self.identifier).encode() + \
                   parse(self.content) + b"\nendobj\n"

    def __setitem__(self, key, value):
        self.content[key] = value

    def __getitem__(self, key):
        return self.content[key]


class MyPdfName():
    def __getattr__(self, name):
        return b'/' + name.encode('ascii')


MyPdfName = MyPdfName()


class MyPdfObject(bytes):
    def __new__(cls, string):
        return bytes.__new__(cls, string.encode('ascii'))


class MyPdfArray(list):
    pass


class MyPdfWriter():
    def __init__(self, version="1.3"):
        self.objects = []
        # create an incomplete pages object so that a /Parent entry can be
        # added to each page
        self.pages = MyPdfDict(Type=MyPdfName.Pages, Kids=[], Count=0)
        self.catalog = MyPdfDict(Pages=self.pages, Type=MyPdfName.Catalog)
        self.version = version  # default pdf version 1.3
        self.pagearray = []

    def addobj(self, obj):
        newid = len(self.objects)+1
        obj.identifier = newid
        self.objects.append(obj)

    def tostream(self, info, stream):
        xreftable = list()

        # justification of the random binary garbage in the header from
        # adobe:
        #
        #  > Note: If a PDF file contains binary data, as most do (see Section
        #  > 3.1, “Lexical Conventions”), it is recommended that the header
        #  > line be immediately followed by a comment line containing at
        #  > least four binary characters—that is, characters whose codes are
        #  > 128 or greater. This ensures proper behavior of file transfer
        #  > applications that inspect data near the beginning of a file to
        #  > determine whether to treat the file’s contents as text or as
        #  > binary.
        #
        # the choice of binary characters is arbitrary but those four seem to
        # be used elsewhere.
        pdfheader = ('%%PDF-%s\n' % self.version).encode('ascii')
        pdfheader += b'%\xe2\xe3\xcf\xd3\n'
        stream.write(pdfheader)

        # From section 3.4.3 of the PDF Reference (version 1.7):
        #
        #  > Each entry is exactly 20 bytes long, including the end-of-line
        #  > marker.
        #  >
        #  > [...]
        #  >
        #  > The format of an in-use entry is
        #  > nnnnnnnnnn ggggg n eol
        #  > where
        #  > nnnnnnnnnn is a 10-digit byte offset
        #  > ggggg is a 5-digit generation number
        #  > n is a literal keyword identifying this as an in-use entry
        #  > eol is a 2-character end-of-line sequence
        #  >
        #  > [...]
        #  >
        #  > If the file’s end-of-line marker is a single character (either a
        #  > carriage return or a line feed), it is preceded by a single space;
        #
        # Since we chose to use a single character eol marker, we precede it by
        # a space
        pos = len(pdfheader)
        xreftable.append(b"0000000000 65535 f \n")
        for o in self.objects:
            xreftable.append(("%010d 00000 n \n" % pos).encode())
            content = o.tostring()
            stream.write(content)
            pos += len(content)

        xrefoffset = pos
        stream.write(b"xref\n")
        stream.write(("0 %d\n" % len(xreftable)).encode())
        for x in xreftable:
            stream.write(x)
        stream.write(b"trailer\n")
        stream.write(parse({b"/Size": len(xreftable), b"/Info": info,
                            b"/Root": self.catalog})+b"\n")
        stream.write(b"startxref\n")
        stream.write(("%d\n" % xrefoffset).encode())
        stream.write(b"%%EOF\n")
        return

    def addpage(self, page):
        page[b"/Parent"] = self.pages
        self.pagearray.append(page)
        self.pages.content[b"/Kids"].append(page)
        self.pages.content[b"/Count"] += 1
        self.addobj(page)


if PY3:
    class MyPdfString():
        @classmethod
        def encode(cls, string, hextype=False):
            if hextype:
                return b'< ' + b' '.join(("%06x"%c).encode('ascii') for c in string) + b' >'
            else:
                try:
                    string = string.encode('ascii')
                except UnicodeEncodeError:
                    string = b"\xfe\xff"+string.encode("utf-16-be")
                string = string.replace(b'\\', b'\\\\')
                string = string.replace(b'(', b'\\(')
                string = string.replace(b')', b'\\)')
                return b'(' + string + b')'
else:
    class MyPdfString(object):
        @classmethod
        def encode(cls, string, hextype=False):
            if hextype:
                return b'< ' + b' '.join(("%06x"%c).encode('ascii') for c in string) + b' >'
            else:
                # This mimics exactely to what pdfrw does.
                string = string.replace(b'\\', b'\\\\')
                string = string.replace(b'(', b'\\(')
                string = string.replace(b')', b'\\)')
                return b'(' + string + b')'


class pdfdoc(object):
    def __init__(self, version="1.3", title=None, author=None, creator=None,
                 producer=None, creationdate=None, moddate=None, subject=None,
                 keywords=None, nodate=False, panes=None, initial_page=None,
                 magnification=None, page_layout=None, fit_window=False,
                 center_window=False, fullscreen=False, with_pdfrw=True):
        if with_pdfrw:
            try:
                from pdfrw import PdfWriter, PdfDict, PdfName, PdfString
                self.with_pdfrw = True
            except ImportError:
                PdfWriter = MyPdfWriter
                PdfDict = MyPdfDict
                PdfName = MyPdfName
                PdfString = MyPdfString
                self.with_pdfrw = False
        else:
            PdfWriter = MyPdfWriter
            PdfDict = MyPdfDict
            PdfName = MyPdfName
            PdfString = MyPdfString
            self.with_pdfrw = False

        now = datetime.now()
        self.info = PdfDict(indirect=True)

        def datetime_to_pdfdate(dt):
            return dt.strftime("%Y%m%d%H%M%SZ")

        if title is not None:
            self.info[PdfName.Title] = PdfString.encode(title)
        if author is not None:
            self.info[PdfName.Author] = PdfString.encode(author)
        if creator is not None:
            self.info[PdfName.Creator] = PdfString.encode(creator)
        if producer is not None and producer != "":
            self.info[PdfName.Producer] = PdfString.encode(producer)
        if creationdate is not None:
            self.info[PdfName.CreationDate] = \
                PdfString.encode("D:"+datetime_to_pdfdate(creationdate))
        elif not nodate:
            self.info[PdfName.CreationDate] = \
                PdfString.encode("D:"+datetime_to_pdfdate(now))
        if moddate is not None:
            self.info[PdfName.ModDate] = \
                PdfString.encode("D:"+datetime_to_pdfdate(moddate))
        elif not nodate:
            self.info[PdfName.ModDate] = PdfString.encode(
                    "D:"+datetime_to_pdfdate(now))
        if subject is not None:
            self.info[PdfName.Subject] = PdfString.encode(subject)
        if keywords is not None:
            self.info[PdfName.Keywords] = PdfString.encode(",".join(keywords))

        self.writer = PdfWriter()
        self.writer.version = version
        # this is done because pdfrw adds info, catalog and pages as the first
        # three objects in this order
        if not self.with_pdfrw:
            self.writer.addobj(self.info)
            self.writer.addobj(self.writer.catalog)
            self.writer.addobj(self.writer.pages)

        self.panes = panes
        self.initial_page = initial_page
        self.magnification = magnification
        self.page_layout = page_layout
        self.fit_window = fit_window
        self.center_window = center_window
        self.fullscreen = fullscreen

    def add_imagepage(self, color, imgwidthpx, imgheightpx, imgformat, imgdata,
                      imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth,
                      pageheight, userunit=None, palette=None):
        if self.with_pdfrw:
            from pdfrw import PdfDict, PdfName, PdfObject, PdfString
            from pdfrw.py23_diffs import convert_load
        else:
            PdfDict = MyPdfDict
            PdfName = MyPdfName
            PdfObject = MyPdfObject
            PdfString = MyPdfString
            convert_load = my_convert_load

        if color == Colorspace['1'] or color == Colorspace.L:
            colorspace = PdfName.DeviceGray
        elif color == Colorspace.RGB:
            colorspace = PdfName.DeviceRGB
        elif color == Colorspace.CMYK or color == Colorspace['CMYK;I']:
            colorspace = PdfName.DeviceCMYK
        elif color == Colorspace.P:
            if self.with_pdfrw:
                raise Exception("pdfrw does not support hex strings for palette image input, re-run with --without-pdfrw")
            colorspace = [ PdfName.Indexed, PdfName.DeviceRGB, len(palette)-1, PdfString.encode(palette, hextype=True)]
        else:
            raise UnsupportedColorspaceError("unsupported color space: %s"
                                             % color.name)

        # either embed the whole jpeg or deflate the bitmap representation
        if imgformat is ImageFormat.JPEG:
            ofilter = PdfName.DCTDecode
        elif imgformat is ImageFormat.JPEG2000:
            ofilter = PdfName.JPXDecode
            self.writer.version = "1.5"  # jpeg2000 needs pdf 1.5
        elif imgformat is ImageFormat.CCITTGroup4:
            ofilter = [PdfName.CCITTFaxDecode]
        else:
            ofilter = PdfName.FlateDecode

        image = PdfDict(stream=convert_load(imgdata))

        image[PdfName.Type] = PdfName.XObject
        image[PdfName.Subtype] = PdfName.Image
        image[PdfName.Filter] = ofilter
        image[PdfName.Width] = imgwidthpx
        image[PdfName.Height] = imgheightpx
        image[PdfName.ColorSpace] = colorspace
        # hardcoded as PIL doesn't provide bits for non-jpeg formats
        if imgformat is ImageFormat.CCITTGroup4:
            image[PdfName.BitsPerComponent] = 1
        else:
            if color == Colorspace['1']:
                image[PdfName.BitsPerComponent] = 1
            elif color == Colorspace.P:
                if len(palette) <= 2**1:
                    image[PdfName.BitsPerComponent] = 1
                elif len(palette) <= 2**4:
                    image[PdfName.BitsPerComponent] = 4
                else:
                    image[PdfName.BitsPerComponent] = 8
            else:
                image[PdfName.BitsPerComponent] = 8

        if color == Colorspace['CMYK;I']:
            # Inverts all four channels
            image[PdfName.Decode] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]

        if imgformat is ImageFormat.CCITTGroup4:
            decodeparms = PdfDict()
            decodeparms[PdfName.K] = -1
            decodeparms[PdfName.BlackIs1] = PdfObject('true')
            decodeparms[PdfName.Columns] = imgwidthpx
            decodeparms[PdfName.Rows] = imgheightpx
            image[PdfName.DecodeParms] = [decodeparms]
        elif imgformat is ImageFormat.PNG:
            decodeparms = PdfDict()
            decodeparms[PdfName.Predictor] = 15
            if color in [ Colorspace.P, Colorspace['1'], Colorspace.L ]:
                decodeparms[PdfName.Colors] = 1
            else:
                decodeparms[PdfName.Colors] = 3
            decodeparms[PdfName.Columns] = imgwidthpx
            if color == Colorspace['1']:
                decodeparms[PdfName.BitsPerComponent] = 1
            elif color == Colorspace.P:
                if len(palette) <= 2**1:
                    decodeparms[PdfName.BitsPerComponent] = 1
                elif len(palette) <= 2**4:
                    decodeparms[PdfName.BitsPerComponent] = 4
                else:
                    decodeparms[PdfName.BitsPerComponent] = 8
            else:
                decodeparms[PdfName.BitsPerComponent] = 8
            image[PdfName.DecodeParms] = decodeparms

        text = ("q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" %
                (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)).encode("ascii")

        content = PdfDict(stream=convert_load(text))
        resources = PdfDict(XObject=PdfDict(Im0=image))

        page = PdfDict(indirect=True)
        page[PdfName.Type] = PdfName.Page
        page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight]
        page[PdfName.Resources] = resources
        page[PdfName.Contents] = content
        if userunit is not None:
            # /UserUnit requires PDF 1.6
            if self.writer.version < '1.6':
                self.writer.version = '1.6'
            page[PdfName.UserUnit] = userunit

        self.writer.addpage(page)

        if not self.with_pdfrw:
            self.writer.addobj(content)
            self.writer.addobj(image)

    def tostring(self):
        stream = BytesIO()
        self.tostream(stream)
        return stream.getvalue()

    def tostream(self, outputstream):
        if self.with_pdfrw:
            from pdfrw import PdfDict, PdfName, PdfArray, PdfObject
        else:
            PdfDict = MyPdfDict
            PdfName = MyPdfName
            PdfObject = MyPdfObject
            PdfArray = MyPdfArray
        NullObject = PdfObject('null')
        TrueObject = PdfObject('true')

        # We fill the catalog with more information like /ViewerPreferences,
        # /PageMode, /PageLayout or /OpenAction because the latter refers to a
        # page object which has to be present so that we can get its id.
        #
        # Furthermore, if using pdfrw, the trailer is cleared every time a page
        # is added, so we can only start using it after all pages have been
        # written.

        if self.with_pdfrw:
            catalog = self.writer.trailer.Root
        else:
            catalog = self.writer.catalog

        if self.fullscreen or self.fit_window or self.center_window or \
                self.panes is not None:
            catalog[PdfName.ViewerPreferences] = PdfDict()

        if self.fullscreen:
            # this setting might be overwritten later by the page mode
            catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \
                    = PdfName.UseNone

        if self.panes == PageMode.thumbs:
            catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \
                    = PdfName.UseThumbs
            # this setting might be overwritten later if fullscreen
            catalog[PdfName.PageMode] = PdfName.UseThumbs
        elif self.panes == PageMode.outlines:
            catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \
                    = PdfName.UseOutlines
            # this setting might be overwritten later if fullscreen
            catalog[PdfName.PageMode] = PdfName.UseOutlines
        elif self.panes in [PageMode.none, None]:
            pass
        else:
            raise ValueError("unknown page mode: %s" % self.panes)

        if self.fit_window:
            catalog[PdfName.ViewerPreferences][PdfName.FitWindow] = TrueObject

        if self.center_window:
            catalog[PdfName.ViewerPreferences][PdfName.CenterWindow] = \
                    TrueObject

        if self.fullscreen:
            catalog[PdfName.PageMode] = PdfName.FullScreen

        # see table 8.2 in section 8.2.1 in
        # http://partners.adobe.com/public/developer/en/pdf/PDFReference16.pdf
        # Fit - Fits the page to the window.
        # FitH - Fits the width of the page to the window.
        # FitV - Fits the height of the page to the window.
        # FitR - Fits the rectangle specified by the four coordinates to the
        #        window.
        # FitB - Fits the page bounding box to the window. This basically
        #        reduces the amount of whitespace (margins) that is displayed
        #        and thus focussing more on the text content.
        # FitBH - Fits the width of the page bounding box to the window.
        # FitBV - Fits the height of the page bounding box to the window.

        # by default the initial page is the first one
        initial_page = self.writer.pagearray[0]
        # we set the open action here to make sure we open on the requested
        # initial page but this value might be overwritten by a custom open
        # action later while still taking the requested initial page into
        # account
        if self.initial_page is not None:
            initial_page = self.writer.pagearray[self.initial_page - 1]
            catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.XYZ,
                                                    NullObject, NullObject, 0])

        if self.magnification == Magnification.fit:
            catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.Fit])
        elif self.magnification == Magnification.fith:
            pagewidth = initial_page[PdfName.MediaBox][2]
            catalog[PdfName.OpenAction] = PdfArray(
                [initial_page, PdfName.FitH, pagewidth])
        elif self.magnification == Magnification.fitbh:
            # quick hack to determine the image width on the page
            imgwidth = float(initial_page[PdfName.Contents].stream.split()[4])
            catalog[PdfName.OpenAction] = PdfArray(
                [initial_page, PdfName.FitBH, imgwidth])
        elif isinstance(self.magnification, float):
            catalog[PdfName.OpenAction] = PdfArray(
                [initial_page, PdfName.XYZ, NullObject, NullObject,
                 self.magnification])
        elif self.magnification is None:
            pass
        else:
            raise ValueError("unknown magnification: %s" % self.magnification)

        if self.page_layout == PageLayout.single:
            catalog[PdfName.PageLayout] = PdfName.SinglePage
        elif self.page_layout == PageLayout.onecolumn:
            catalog[PdfName.PageLayout] = PdfName.OneColumn
        elif self.page_layout == PageLayout.twocolumnright:
            catalog[PdfName.PageLayout] = PdfName.TwoColumnRight
        elif self.page_layout == PageLayout.twocolumnleft:
            catalog[PdfName.PageLayout] = PdfName.TwoColumnLeft
        elif self.page_layout is None:
            pass
        else:
            raise ValueError("unknown page layout: %s" % self.page_layout)

        # now write out the PDF
        if self.with_pdfrw:
            self.writer.trailer.Info = self.info
            self.writer.write(outputstream)
        else:
            self.writer.tostream(self.info, outputstream)


def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None):
    if imgformat == ImageFormat.JPEG2000 \
            and rawdata is not None and imgdata is None:
        # this codepath gets called if the PIL installation is not able to
        # handle JPEG2000 files
        imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata)

        if hdpi is None:
            hdpi = default_dpi
        if vdpi is None:
            vdpi = default_dpi
        ndpi = (hdpi, vdpi)
    else:
        imgwidthpx, imgheightpx = imgdata.size

        ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi))
        # In python3, the returned dpi value for some tiff images will
        # not be an integer but a float. To make the behaviour of
        # img2pdf the same between python2 and python3, we convert that
        # float into an integer by rounding.
        # Search online for the 72.009 dpi problem for more info.
        ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
        ics = imgdata.mode

    if ics in ["LA", "PA", "RGBA"]:
        logging.warning("Image contains transparency which cannot be retained in PDF.")
        logging.warning("img2pdf will not perform a lossy operation.")
        logging.warning("You can remove the alpha channel using imagemagick:")
        logging.warning("  $ convert input.png -background white -alpha remove -alpha off output.png")
        raise Exception("Refusing to work on images with alpha channel")


    # Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0
    # Pillow retrieves the DPI from EXIF if it cannot find the DPI in the JPEG
    # header. In that case it can happen that the horizontal and vertical DPI
    # are set to zero.
    if ndpi == (0, 0):
        ndpi = (default_dpi, default_dpi)

    logging.debug("input dpi = %d x %d", *ndpi)

    if colorspace:
        color = colorspace
        logging.debug("input colorspace (forced) = %s", color)
    else:
        color = None
        for c in Colorspace:
            if c.name == ics:
                color = c
        if color is None:
            color = Colorspace.other
        if color == Colorspace.CMYK and imgformat == ImageFormat.JPEG:
            # Adobe inverts CMYK JPEGs for some reason, and others
            # have followed suit as well. Some software assumes the
            # JPEG is inverted if the Adobe tag (APP14), while other
            # software assumes all CMYK JPEGs are inverted. I don't
            # have enough experience with these to know which is
            # better for images currently in the wild, so I'm going
            # with the first approach for now.
            if "adobe" in imgdata.info:
                color = Colorspace['CMYK;I']
        logging.debug("input colorspace = %s", color.name)

    logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)

    return (color, ndpi, imgwidthpx, imgheightpx)


def transcode_monochrome(imgdata):
    """Convert the open PIL.Image imgdata to compressed CCITT Group4 data"""

    from PIL import TiffImagePlugin

    logging.debug("Converting monochrome to CCITT Group4")

    # Convert the image to Group 4 in memory. If libtiff is not installed and
    # Pillow is not compiled against it, .save() will raise an exception.
    newimgio = BytesIO()

    # we create a whole new PIL image or otherwise it might happen with some
    # input images, that libtiff fails an assert and the whole process is
    # killed by a SIGABRT:
    #   https://gitlab.mister-muffin.de/josch/img2pdf/issues/46
    im = Image.frombytes(imgdata.mode, imgdata.size, imgdata.tobytes())
    im.save(newimgio, format='TIFF', compression='group4')

    # Open new image in memory
    newimgio.seek(0)
    newimg = Image.open(newimgio)

    # If Pillow is passed an invalid compression argument it will ignore it;
    # make sure the image actually got compressed.
    if newimg.info['compression'] != 'group4':
        raise ValueError("Image not compressed as expected")

    # Read the TIFF tags to find the offset(s) of the compressed data strips.
    strip_offsets = newimg.tag_v2[TiffImagePlugin.STRIPOFFSETS]
    strip_bytes = newimg.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS]
    rows_per_strip = newimg.tag_v2[TiffImagePlugin.ROWSPERSTRIP]

    # PIL always seems to create a single strip even for very large TIFFs when
    # it saves images, so assume we only have to read a single strip.
    # A test ~10 GPixel image was still encoded as a single strip. Just to be
    # safe check throw an error if there is more than one offset.
    if len(strip_offsets) > 1:
        raise NotImplementedError("Transcoding multiple strips not supported")

    newimgio.seek(strip_offsets[0])
    ccittdata = newimgio.read(strip_bytes[0])

    return ccittdata

def parse_png(rawdata):
    pngidat = b""
    palette = []
    i = 16
    while i < len(rawdata):
        # once we can require Python >= 3.2 we can use int.from_bytes() instead
        n, = struct.unpack('>I', rawdata[i-8:i-4])
        if i + n > len(rawdata):
            raise Exception("invalid png: %d %d %d"%(i, n, len(rawdata)))
        if rawdata[i-4:i] == b"IDAT":
            pngidat += rawdata[i:i+n]
        elif rawdata[i-4:i] == b"PLTE":
            for j in range(i, i+n, 3):
                # with int.from_bytes() we would not have to prepend extra zeroes
                color, = struct.unpack('>I', b'\x00'+rawdata[j:j+3])
                palette.append(color)
        i += n
        i += 12
    return pngidat, palette

def read_images(rawdata, colorspace, first_frame_only=False):
    im = BytesIO(rawdata)
    im.seek(0)
    imgdata = None
    try:
        imgdata = Image.open(im)
    except IOError as e:
        # test if it is a jpeg2000 image
        if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
            raise ImageOpenError("cannot read input image (not jpeg2000). "
                                 "PIL: error reading image: %s" % e)
        # image is jpeg2000
        imgformat = ImageFormat.JPEG2000
    else:
        imgformat = None
        for f in ImageFormat:
            if f.name == imgdata.format:
                imgformat = f
        if imgformat is None:
            imgformat = ImageFormat.other

    logging.debug("imgformat = %s", imgformat.name)

    # depending on the input format, determine whether to pass the raw
    # image or the zlib compressed color information

    # JPEG and JPEG2000 can be embedded into the PDF as-is
    if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000:
        color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
                imgdata, imgformat, default_dpi, colorspace, rawdata)
        if color == Colorspace['1']:
            raise JpegColorspaceError("jpeg can't be monochrome")
        if color == Colorspace['P']:
            raise JpegColorspaceError("jpeg can't have a color palette")
        if color == Colorspace['RGBA']:
            raise JpegColorspaceError("jpeg can't have an alpha channel")
        im.close()
        return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx, [])]

    # We can directly embed the IDAT chunk of PNG images if the PNG is not
    # interlaced
    #
    # PIL does not provide the information whether a PNG was stored interlaced
    # or not. Thus, we retrieve that info manually by looking at byte 13 in the
    # IHDR chunk. We know where to find that in the file because the IHDR chunk
    # must be the first chunk.
    if imgformat == ImageFormat.PNG and rawdata[28] == 0:
        color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
                imgdata, imgformat, default_dpi, colorspace, rawdata)
        pngidat, palette = parse_png(rawdata)
        return [(color, ndpi, imgformat, pngidat, imgwidthpx, imgheightpx, palette)]

    # Everything else has to be encoded

    result = []
    img_page_count = 0
    # loop through all frames of the image (example: multipage TIFF)
    while True:
        try:
            imgdata.seek(img_page_count)
        except EOFError:
            break

        if first_frame_only and img_page_count > 0:
            break

        logging.debug("Converting frame: %d" % img_page_count)

        color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
                imgdata, imgformat, default_dpi, colorspace)

        newimg = None
        if color == Colorspace['1']:
            try:
                ccittdata = transcode_monochrome(imgdata)
                imgformat = ImageFormat.CCITTGroup4
                result.append((color, ndpi, imgformat, ccittdata,
                               imgwidthpx, imgheightpx, []))
                img_page_count += 1
                continue
            except Exception as e:
                logging.debug(e)
                logging.debug("Converting colorspace 1 to L")
                newimg = imgdata.convert('L')
                color = Colorspace.L
        elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK,
                       Colorspace["CMYK;I"], Colorspace.P]:
            logging.debug("Colorspace is OK: %s", color)
            newimg = imgdata
        else:
            raise ValueError("unknown or unsupported colorspace: %s" % color.name)
        # the PNG format does not support CMYK, so we fall back to normal
        # compression
        if color in [Colorspace.CMYK, Colorspace["CMYK;I"]]:
            imggz = zlib.compress(newimg.tobytes())
            result.append((color, ndpi, imgformat, imggz, imgwidthpx,
                           imgheightpx, []))
        else:
            # cheapo version to retrieve a PNG encoding of the payload is to
            # just save it with PIL. In the future this could be replaced by
            # dedicated function applying the Paeth PNG filter to the raw pixel
            pngbuffer = BytesIO()
            newimg.save(pngbuffer, format="png")
            pngidat, palette = parse_png(pngbuffer.getvalue())
            imgformat = ImageFormat.PNG
            result.append((color, ndpi, imgformat, pngidat, imgwidthpx,
                           imgheightpx, palette))
        img_page_count += 1
    # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
    # close() method
    try:
        imgdata.close()
    except AttributeError:
        pass
    im.close()
    return result


# converts a length in pixels to a length in PDF units (1/72 of an inch)
def px_to_pt(length, dpi):
    return 72.0*length/dpi


def cm_to_pt(length):
    return (72.0*length)/2.54


def mm_to_pt(length):
    return (72.0*length)/25.4


def in_to_pt(length):
    return 72.0*length


def get_layout_fun(pagesize=None, imgsize=None, border=None, fit=None,
                   auto_orient=False):
    def fitfun(fit, imgwidth, imgheight, fitwidth, fitheight):
        if fitwidth is None and fitheight is None:
            raise ValueError("fitwidth and fitheight cannot both be None")
        # if fit is fill or enlarge then it is okay if one of the dimensions
        # are negative but one of them must still be positive
        # if fit is not fill or enlarge then both dimensions must be positive
        if fit in [FitMode.fill, FitMode.enlarge] and \
                fitwidth is not None and fitwidth < 0 and \
                fitheight is not None and fitheight < 0:
            raise ValueError("cannot fit into a rectangle where both "
                             "dimensions are negative")
        elif fit not in [FitMode.fill, FitMode.enlarge] and \
                ((fitwidth is not None and fitwidth < 0) or
                    (fitheight is not None and fitheight < 0)):
            raise Exception("cannot fit into a rectangle where either "
                            "dimensions are negative")

        def default():
            if fitwidth is not None and fitheight is not None:
                newimgwidth = fitwidth
                newimgheight = (newimgwidth * imgheight)/imgwidth
                if newimgheight > fitheight:
                    newimgheight = fitheight
                    newimgwidth = (newimgheight * imgwidth)/imgheight
            elif fitwidth is None and fitheight is not None:
                newimgheight = fitheight
                newimgwidth = (newimgheight * imgwidth)/imgheight
            elif fitheight is None and fitwidth is not None:
                newimgwidth = fitwidth
                newimgheight = (newimgwidth * imgheight)/imgwidth
            else:
                raise ValueError("fitwidth and fitheight cannot both be None")
            return newimgwidth, newimgheight
        if fit is None or fit == FitMode.into:
            return default()
        elif fit == FitMode.fill:
            if fitwidth is not None and fitheight is not None:
                newimgwidth = fitwidth
                newimgheight = (newimgwidth * imgheight)/imgwidth
                if newimgheight < fitheight:
                    newimgheight = fitheight
                    newimgwidth = (newimgheight * imgwidth)/imgheight
            elif fitwidth is None and fitheight is not None:
                newimgheight = fitheight
                newimgwidth = (newimgheight * imgwidth)/imgheight
            elif fitheight is None and fitwidth is not None:
                newimgwidth = fitwidth
                newimgheight = (newimgwidth * imgheight)/imgwidth
            else:
                raise ValueError("fitwidth and fitheight cannot both be None")
            return newimgwidth, newimgheight
        elif fit == FitMode.exact:
            if fitwidth is not None and fitheight is not None:
                return fitwidth, fitheight
            elif fitwidth is None and fitheight is not None:
                newimgheight = fitheight
                newimgwidth = (newimgheight * imgwidth)/imgheight
            elif fitheight is None and fitwidth is not None:
                newimgwidth = fitwidth
                newimgheight = (newimgwidth * imgheight)/imgwidth
            else:
                raise ValueError("fitwidth and fitheight cannot both be None")
            return newimgwidth, newimgheight
        elif fit == FitMode.shrink:
            if fitwidth is not None and fitheight is not None:
                if imgwidth <= fitwidth and imgheight <= fitheight:
                    return imgwidth, imgheight
            elif fitwidth is None and fitheight is not None:
                if imgheight <= fitheight:
                    return imgwidth, imgheight
            elif fitheight is None and fitwidth is not None:
                if imgwidth <= fitwidth:
                    return imgwidth, imgheight
            else:
                raise ValueError("fitwidth and fitheight cannot both be None")
            return default()
        elif fit == FitMode.enlarge:
            if fitwidth is not None and fitheight is not None:
                if imgwidth > fitwidth or imgheight > fitheight:
                    return imgwidth, imgheight
            elif fitwidth is None and fitheight is not None:
                if imgheight > fitheight:
                    return imgwidth, imgheight
            elif fitheight is None and fitwidth is not None:
                if imgwidth > fitwidth:
                    return imgwidth, imgheight
            else:
                raise ValueError("fitwidth and fitheight cannot both be None")
            return default()
        else:
            raise NotImplementedError
    # if no layout arguments are given, then the image size is equal to the
    # page size and will be drawn with the default dpi
    if pagesize is None and imgsize is None and border is None:
        return default_layout_fun
    if pagesize is None and imgsize is None and border is not None:
        def layout_fun(imgwidthpx, imgheightpx, ndpi):
            imgwidthpdf = px_to_pt(imgwidthpx, ndpi[0])
            imgheightpdf = px_to_pt(imgheightpx, ndpi[1])
            pagewidth = imgwidthpdf+2*border[1]
            pageheight = imgheightpdf+2*border[0]
            return pagewidth, pageheight, imgwidthpdf, imgheightpdf
        return layout_fun
    if border is None:
        border = (0, 0)
    # if the pagesize is given but the imagesize is not, then the imagesize
    # will be calculated from the pagesize, taking into account the border
    # and the fitting
    if pagesize is not None and imgsize is None:
        def layout_fun(imgwidthpx, imgheightpx, ndpi):
            if pagesize[0] is not None and pagesize[1] is not None and \
                    auto_orient and \
                    ((imgwidthpx > imgheightpx and
                     pagesize[0] < pagesize[1]) or
                     (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])):
                pagewidth, pageheight = pagesize[1], pagesize[0]
                newborder = border[1], border[0]
            else:
                pagewidth, pageheight = pagesize[0], pagesize[1]
                newborder = border
            if pagewidth is not None:
                fitwidth = pagewidth-2*newborder[1]
            else:
                fitwidth = None
            if pageheight is not None:
                fitheight = pageheight-2*newborder[0]
            else:
                fitheight = None
            if fit in [FitMode.fill, FitMode.enlarge] and \
                    fitwidth is not None and fitwidth < 0 and \
                    fitheight is not None and fitheight < 0:
                raise NegativeDimensionError(
                    "at least one border dimension musts be smaller than half "
                    "the respective page dimension")
            elif fit not in [FitMode.fill, FitMode.enlarge] \
                    and ((fitwidth is not None and fitwidth < 0) or
                         (fitheight is not None and fitheight < 0)):
                raise NegativeDimensionError(
                    "one border dimension is larger than half of the "
                    "respective page dimension")
            imgwidthpdf, imgheightpdf = \
                fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]),
                       px_to_pt(imgheightpx, ndpi[1]),
                       fitwidth, fitheight)
            if pagewidth is None:
                pagewidth = imgwidthpdf+border[1]*2
            if pageheight is None:
                pageheight = imgheightpdf+border[0]*2
            return pagewidth, pageheight, imgwidthpdf, imgheightpdf
        return layout_fun

    def scale_imgsize(s, px, dpi):
        if s is None:
            return None
        mode, value = s
        if mode == ImgSize.abs:
            return value
        if mode == ImgSize.perc:
            return (px_to_pt(px, dpi)*value)/100
        if mode == ImgSize.dpi:
            return px_to_pt(px, value)
        raise NotImplementedError
    if pagesize is None and imgsize is not None:
        def layout_fun(imgwidthpx, imgheightpx, ndpi):
            imgwidthpdf, imgheightpdf = \
                    fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]),
                           px_to_pt(imgheightpx, ndpi[1]),
                           scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]),
                           scale_imgsize(imgsize[1], imgheightpx, ndpi[1]))
            pagewidth = imgwidthpdf+2*border[1]
            pageheight = imgheightpdf+2*border[0]
            return pagewidth, pageheight, imgwidthpdf, imgheightpdf
        return layout_fun
    if pagesize is not None and imgsize is not None:
        def layout_fun(imgwidthpx, imgheightpx, ndpi):
            if pagesize[0] is not None and pagesize[1] is not None and \
                    auto_orient and \
                    ((imgwidthpx > imgheightpx and
                      pagesize[0] < pagesize[1]) or
                     (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])):
                pagewidth, pageheight = pagesize[1], pagesize[0]
            else:
                pagewidth, pageheight = pagesize[0], pagesize[1]
            imgwidthpdf, imgheightpdf = \
                fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]),
                       px_to_pt(imgheightpx, ndpi[1]),
                       scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]),
                       scale_imgsize(imgsize[1], imgheightpx, ndpi[1]))
            return pagewidth, pageheight, imgwidthpdf, imgheightpdf
        return layout_fun
    raise NotImplementedError


def default_layout_fun(imgwidthpx, imgheightpx, ndpi):
    imgwidthpdf = pagewidth = px_to_pt(imgwidthpx, ndpi[0])
    imgheightpdf = pageheight = px_to_pt(imgheightpx, ndpi[1])
    return pagewidth, pageheight, imgwidthpdf, imgheightpdf


def get_fixed_dpi_layout_fun(fixed_dpi):
    """Layout function that overrides whatever DPI is claimed in input images.

    >>> layout_fun = get_fixed_dpi_layout_fun((300, 300))
    >>> convert(image1, layout_fun=layout_fun, ... outputstream=...)
    """
    def fixed_dpi_layout_fun(imgwidthpx, imgheightpx, ndpi):
        return default_layout_fun(imgwidthpx, imgheightpx, fixed_dpi)
    return fixed_dpi_layout_fun


def find_scale(pagewidth, pageheight):
    """Find the power of 10 (10, 100, 1000...) that will reduce the scale
    below the PDF specification limit of 14400 PDF units (=200 inches)"""
    from math import log10, ceil

    major = max(pagewidth, pageheight)
    oversized = major / 14400.0

    return 10 ** ceil(log10(oversized))


# given one or more input image, depending on outputstream, either return a
# string containing the whole PDF if outputstream is None or write the PDF
# data to the given file-like object and return None
#
# Input images can be given as file like objects (they must implement read()),
# as a binary string representing the image content or as filenames to the
# images.
def convert(*images, **kwargs):

    _default_kwargs = dict(
        title=None,
        author=None, creator=None, producer=None, creationdate=None,
        moddate=None, subject=None, keywords=None, colorspace=None,
        nodate=False, layout_fun=default_layout_fun, viewer_panes=None,
        viewer_initial_page=None, viewer_magnification=None,
        viewer_page_layout=None, viewer_fit_window=False,
        viewer_center_window=False, viewer_fullscreen=False,
        with_pdfrw=True, outputstream=None, first_frame_only=False,
        allow_oversized=True)
    for kwname, default in _default_kwargs.items():
        if kwname not in kwargs:
            kwargs[kwname] = default

    pdf = pdfdoc(
        "1.3",
        kwargs['title'], kwargs['author'], kwargs['creator'],
        kwargs['producer'], kwargs['creationdate'], kwargs['moddate'],
        kwargs['subject'], kwargs['keywords'], kwargs['nodate'],
        kwargs['viewer_panes'], kwargs['viewer_initial_page'],
        kwargs['viewer_magnification'], kwargs['viewer_page_layout'],
        kwargs['viewer_fit_window'], kwargs['viewer_center_window'],
        kwargs['viewer_fullscreen'], kwargs['with_pdfrw'])

    # backwards compatibility with older img2pdf versions where the first
    # argument to the function had to be given as a list
    if len(images) == 1:
        # if only one argument was given and it is a list, expand it
        if isinstance(images[0], (list, tuple)):
            images = images[0]

    if not isinstance(images, (list, tuple)):
        images = [images]

    for img in images:
        # img is allowed to be a path, a binary string representing image data
        # or a file-like object (really anything that implements read())
        try:
            rawdata = img.read()
        except AttributeError:
            if not isinstance(img, (str, bytes)):
                raise TypeError(
                        "Neither implements read() nor is str or bytes")
            # the thing doesn't have a read() function, so try if we can treat
            # it as a file name
            try:
                with open(img, "rb") as f:
                    rawdata = f.read()
            except:
                # whatever the exception is (string could contain NUL
                # characters or the path could just not exist) it's not a file
                # name so we now try treating it as raw image content
                rawdata = img

        for color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx, palette \
                in read_images(
                    rawdata, kwargs['colorspace'], kwargs['first_frame_only']):
            pagewidth, pageheight, imgwidthpdf, imgheightpdf = \
                kwargs['layout_fun'](imgwidthpx, imgheightpx, ndpi)

            userunit = None
            if pagewidth < 3.00 or pageheight < 3.00:
                logging.warning("pdf width or height is below 3.00 - too "
                                "small for some viewers!")
            elif pagewidth > 14400.0 or pageheight > 14400.0:
                if kwargs['allow_oversized']:
                    userunit = find_scale(pagewidth, pageheight)
                    pagewidth /= userunit
                    pageheight /= userunit
                    imgwidthpdf /= userunit
                    imgheightpdf /= userunit
                else:
                    raise PdfTooLargeError(
                        "pdf width or height must not exceed 200 inches.")
            # the image is always centered on the page
            imgxpdf = (pagewidth - imgwidthpdf)/2.0
            imgypdf = (pageheight - imgheightpdf)/2.0
            pdf.add_imagepage(color, imgwidthpx, imgheightpx, imgformat,
                              imgdata, imgwidthpdf, imgheightpdf, imgxpdf,
                              imgypdf, pagewidth, pageheight, userunit, palette)

    if kwargs['outputstream']:
        pdf.tostream(kwargs['outputstream'])
        return

    return pdf.tostring()


def parse_num(num, name):
    if num == '':
        return None
    unit = None
    if num.endswith("pt"):
        unit = Unit.pt
    elif num.endswith("cm"):
        unit = Unit.cm
    elif num.endswith("mm"):
        unit = Unit.mm
    elif num.endswith("in"):
        unit = Unit.inch
    else:
        try:
            num = float(num)
        except ValueError:
            msg = "%s is not a floating point number and doesn't have a " \
                  "valid unit: %s" % (name, num)
            raise argparse.ArgumentTypeError(msg)
    if unit is None:
        unit = Unit.pt
    else:
        num = num[:-2]
        try:
            num = float(num)
        except ValueError:
            msg = "%s is not a floating point number: %s" % (name, num)
            raise argparse.ArgumentTypeError(msg)
    if unit == Unit.cm:
        num = cm_to_pt(num)
    elif unit == Unit.mm:
        num = mm_to_pt(num)
    elif unit == Unit.inch:
        num = in_to_pt(num)
    return num


def parse_imgsize_num(num, name):
    if num == '':
        return None
    unit = None
    if num.endswith("pt"):
        unit = ImgUnit.pt
    elif num.endswith("cm"):
        unit = ImgUnit.cm
    elif num.endswith("mm"):
        unit = ImgUnit.mm
    elif num.endswith("in"):
        unit = ImgUnit.inch
    elif num.endswith("dpi"):
        unit = ImgUnit.dpi
    elif num.endswith("%"):
        unit = ImgUnit.perc
    else:
        try:
            num = float(num)
        except ValueError:
            msg = "%s is not a floating point number and doesn't have a " \
                  "valid unit: %s" % (name, num)
            raise argparse.ArgumentTypeError(msg)
    if unit is None:
        unit = ImgUnit.pt
    else:
        # strip off unit from string
        if unit == ImgUnit.dpi:
            num = num[:-3]
        elif unit == ImgUnit.perc:
            num = num[:-1]
        else:
            num = num[:-2]
        try:
            num = float(num)
        except ValueError:
            msg = "%s is not a floating point number: %s" % (name, num)
            raise argparse.ArgumentTypeError(msg)
    if unit == ImgUnit.cm:
        num = (ImgSize.abs, cm_to_pt(num))
    elif unit == ImgUnit.mm:
        num = (ImgSize.abs, mm_to_pt(num))
    elif unit == ImgUnit.inch:
        num = (ImgSize.abs, in_to_pt(num))
    elif unit == ImgUnit.pt:
        num = (ImgSize.abs, num)
    elif unit == ImgUnit.dpi:
        num = (ImgSize.dpi, num)
    elif unit == ImgUnit.perc:
        num = (ImgSize.perc, num)
    return num


def parse_pagesize_rectarg(string):
    transposed = string.endswith("^T")
    if transposed:
        string = string[:-2]
    if papersizes.get(string.lower()):
        string = papersizes[string.lower()]
    if 'x' not in string:
        # if there is no separating "x" in the string, then the string is
        # interpreted as the width
        w = parse_num(string, "width")
        h = None
    else:
        w, h = string.split('x', 1)
        w = parse_num(w, "width")
        h = parse_num(h, "height")
    if transposed:
        w, h = h, w
    if w is None and h is None:
        raise argparse.ArgumentTypeError("at least one dimension must be "
                                         "specified")
    return w, h


def parse_imgsize_rectarg(string):
    transposed = string.endswith("^T")
    if transposed:
        string = string[:-2]
    if papersizes.get(string.lower()):
        string = papersizes[string.lower()]
    if 'x' not in string:
        # if there is no separating "x" in the string, then the string is
        # interpreted as the width
        w = parse_imgsize_num(string, "width")
        h = None
    else:
        w, h = string.split('x', 1)
        w = parse_imgsize_num(w, "width")
        h = parse_imgsize_num(h, "height")
    if transposed:
        w, h = h, w
    if w is None and h is None:
        raise argparse.ArgumentTypeError("at least one dimension must be "
                                         "specified")
    return w, h


def parse_colorspacearg(string):
    for c in Colorspace:
        if c.name == string:
            return c
    allowed = ", ".join([c.name for c in Colorspace])
    raise argparse.ArgumentTypeError("Unsupported colorspace: %s. Must be one "
                                     "of: %s." % (string, allowed))


def parse_borderarg(string):
    if ':' in string:
        h, v = string.split(':', 1)
        if h == '':
            raise argparse.ArgumentTypeError("missing value before colon")
        if v == '':
            raise argparse.ArgumentTypeError("missing value after colon")
    else:
        if string == '':
            raise argparse.ArgumentTypeError("border option cannot be empty")
        h, v = string, string
    h, v = parse_num(h, "left/right border"), parse_num(v, "top/bottom border")
    if h is None and v is None:
        raise argparse.ArgumentTypeError("missing value")
    return h, v


def input_images(path):
    if path == '-':
        # we slurp in all data from stdin because we need to seek in it later
        result = sys.stdin.buffer.read()
        if len(result) == 0:
            raise argparse.ArgumentTypeError("\"%s\" is empty" % path)
    else:
        try:
            if os.path.getsize(path) == 0:
                raise argparse.ArgumentTypeError("\"%s\" is empty" % path)
            # test-read a byte from it so that we can abort early in case
            # we cannot read data from the file
            with open(path, "rb") as im:
                im.read(1)
        except IsADirectoryError:
            raise argparse.ArgumentTypeError(
                "\"%s\" is a directory" % path)
        except PermissionError:
            raise argparse.ArgumentTypeError(
                "\"%s\" permission denied" % path)
        except FileNotFoundError:
            raise argparse.ArgumentTypeError(
                "\"%s\" does not exist" % path)
        result = path
    return result


def parse_fitarg(string):
    for m in FitMode:
        if m.name == string.lower():
            return m
    raise argparse.ArgumentTypeError("unknown fit mode: %s" % string)


def parse_panes(string):
    for m in PageMode:
        if m.name == string.lower():
            return m
    allowed = ", ".join([m.name for m in PageMode])
    raise argparse.ArgumentTypeError("Unsupported page mode: %s. Must be one "
                                     "of: %s." % (string, allowed))


def parse_magnification(string):
    for m in Magnification:
        if m.name == string.lower():
            return m
    try:
        return float(string)
    except ValueError:
        pass
    allowed = ", ".join([m.name for m in Magnification])
    raise argparse.ArgumentTypeError("Unsupported magnification: %s. Must be "
                                     "a floating point number or one of: %s." %
                                     (string, allowed))


def parse_layout(string):
    for l in PageLayout:
        if l.name == string.lower():
            return l
    allowed = ", ".join([l.name for l in PageLayout])
    raise argparse.ArgumentTypeError("Unsupported page layout: %s. Must be "
                                     "one of: %s." % (string, allowed))


def valid_date(string):
    # first try parsing in ISO8601 format
    try:
        return datetime.strptime(string, "%Y-%m-%d")
    except ValueError:
        pass
    try:
        return datetime.strptime(string, "%Y-%m-%dT%H:%M")
    except ValueError:
        pass
    try:
        return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")
    except ValueError:
        pass
    # then try dateutil
    try:
        from dateutil import parser
    except ImportError:
        pass
    else:
        try:
            return parser.parse(string)
        except TypeError:
            pass
    # as a last resort, try the local date utility
    try:
        import subprocess
    except ImportError:
        pass
    else:
        try:
            utime = subprocess.check_output(["date", "--date", string, "+%s"])
        except subprocess.CalledProcessError:
            pass
        else:
            return datetime.utcfromtimestamp(int(utime))
    raise argparse.ArgumentTypeError("cannot parse date: %s" % string)


def main():
    rendered_papersizes = ""
    for k, v in sorted(papersizes.items()):
        rendered_papersizes += "    %-8s %s\n" % (papernames[k], v)

    parser = argparse.ArgumentParser(
            formatter_class=argparse.RawDescriptionHelpFormatter,
            description='''\
Losslessly convert raster images to PDF without re-encoding PNG, JPEG, and
JPEG2000 images. This leads to a lossless conversion of PNG, JPEG and JPEG2000
images with the only added file size coming from the PDF container itself.
Other raster graphics formats are losslessly stored using the same encoding
that PNG uses. Since PDF does not support images with transparency and since
img2pdf aims to never be lossy, input images with an alpha channel are not
supported.

The output is sent to standard output so that it can be redirected into a file
or to another program as part of a shell pipe. To directly write the output
into a file, use the -o or --output option.

Options:
''',
            epilog='''\
Colorspace:
  Currently, the colorspace must be forced for JPEG 2000 images that are not in
  the RGB colorspace.  Available colorspace options are based on Python Imaging
  Library (PIL) short handles.

    RGB      RGB color
    L        Grayscale
    1        Black and white (internally converted to grayscale)
    CMYK     CMYK color
    CMYK;I   CMYK color with inversion (for CMYK JPEG files from Adobe)

Paper sizes:
  You can specify the short hand paper size names shown in the first column in
  the table below as arguments to the --pagesize and --imgsize options.  The
  width and height they are mapping to is shown in the second column.  Giving
  the value in the second column has the same effect as giving the short hand
  in the first column. Appending ^T (a caret/circumflex followed by the letter
  T) turns the paper size from portrait into landscape. The postfix thus
  symbolizes the transpose. The values are case insensitive.

%s

Fit options:
  The img2pdf options for the --fit argument are shown in the first column in
  the table below. The function of these options can be mapped to the geometry
  operators of imagemagick. For users who are familiar with imagemagick, the
  corresponding operator is shown in the second column.  The third column shows
  whether or not the aspect ratio is preserved for that option (same as in
  imagemagick). Just like imagemagick, img2pdf tries hard to preserve the
  aspect ratio, so if the --fit argument is not given, then the default is
  "into" which corresponds to the absence of any operator in imagemagick.
  The value of the --fit option is case insensitive.

    into    |   | Y | The default. Width and height values specify maximum
            |   |   | values.
   ---------+---+---+----------------------------------------------------------
    fill    | ^ | Y | Width and height values specify the minimum values.
   ---------+---+---+----------------------------------------------------------
    exact   | ! | N | Width and height emphatically given.
   ---------+---+---+----------------------------------------------------------
    shrink  | > | Y | Shrinks an image with dimensions larger than the given
            |   |   | ones (and otherwise behaves like "into").
   ---------+---+---+----------------------------------------------------------
    enlarge | < | Y | Enlarges an image with dimensions smaller than the given
            |   |   | ones (and otherwise behaves like "into").

Argument parsing:
  Argument long options can be abbreviated to a prefix if the abbreviation is
  anambiguous. That is, the prefix must match a unique option.

  Beware of your shell interpreting argument values as special characters (like
  the semicolon in the CMYK;I colorspace option). If in doubt, put the argument
  values in single quotes.

  If you want an argument value to start with one or more minus characters, you
  must use the long option name and join them with an equal sign like so:

    $ img2pdf --author=--test--

  If your input file name starts with one or more minus characters, either
  separate the input files from the other arguments by two minus signs:

    $ img2pdf -- --my-file-starts-with-two-minuses.jpg

  Or be more explicit about its relative path by prepending a ./:

    $ img2pdf ./--my-file-starts-with-two-minuses.jpg

  The order of non-positional arguments (all arguments other than the input
  images) does not matter.

Examples:
  Lines starting with a dollar sign denote commands you can enter into your
  terminal. The dollar sign signifies your command prompt. It is not part of
  the command you type.

  Convert two scans in JPEG format to a PDF document.

    $ img2pdf --output out.pdf page1.jpg page2.jpg

  Convert a directory of JPEG images into a PDF with printable A4 pages in
  landscape mode. On each page, the photo takes the maximum amount of space
  while preserving its aspect ratio and a print border of 2 cm on the top and
  bottom and 2.5 cm on the left and right hand side.

    $ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg

  On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
  original image size if the image is smaller than that.

    $ img2pdf --output out.pdf -S A4 --imgsize 10cmx15cm --fit shrink *.jpg

  Prepare a directory of photos to be printed borderless on photo paper with a
  3:2 aspect ratio and rotate each page so that its orientation is the same as
  the input image.

    $ img2pdf --output out.pdf --pagesize 15cmx10cm --auto-orient *.jpg

  Encode a grayscale JPEG2000 image. The colorspace has to be forced as img2pdf
  cannot read it from the JPEG2000 file automatically.

    $ img2pdf --output out.pdf --colorspace L input.jp2

Written by Johannes 'josch' Schauer <josch@mister-muffin.de>

Report bugs at https://gitlab.mister-muffin.de/josch/img2pdf/issues
''' % rendered_papersizes)

    parser.add_argument(
        'images', metavar='infile', type=input_images, nargs='*',
        help='Specifies the input file(s) in any format that can be read by '
        'the Python Imaging Library (PIL). If no input images are given, then '
        'a single image is read from standard input. The special filename "-" '
        'can be used once to read an image from standard input. To read a '
        'file in the current directory with the filename "-", pass it to '
        'img2pdf by explicitly stating its relative path like "./-".')
    parser.add_argument(
        '-v', '--verbose', action="store_true",
        help='Makes the program operate in verbose mode, printing messages on '
             'standard error.')
    parser.add_argument(
        '-V', '--version', action='version', version='%(prog)s '+__version__,
        help="Prints version information and exits.")

    outargs = parser.add_argument_group(
            title='General output arguments',
            description='Arguments controlling the output format.')

    outargs.add_argument(
        '-o', '--output', metavar='out', type=argparse.FileType('wb'),
        default=sys.stdout.buffer,
        help='Makes the program output to a file instead of standard output.')
    outargs.add_argument(
        '-C', '--colorspace', metavar='colorspace', type=parse_colorspacearg,
        help='''
Forces the PIL colorspace. See the epilogue for a list of possible values.
Usually the PDF colorspace would be derived from the color space of the input
image. This option overwrites the automatically detected colorspace from the
input image and thus forces a certain colorspace in the output PDF /ColorSpace
property. This is useful for JPEG 2000 images with a different colorspace than
RGB.''')

    outargs.add_argument(
        '-D', '--nodate', action="store_true",
        help='Suppresses timestamps in the output and thus makes the output '
              'deterministic between individual runs. You can also manually '
              'set a date using the --moddate and --creationdate options.')

    outargs.add_argument(
        "--without-pdfrw", action="store_true",
        help="By default, img2pdf uses the pdfrw library to create the output "
             "PDF if pdfrw is available. If you want to use the internal PDF "
             "generator of img2pdf even if pdfrw is present, then pass this "
             "option. This can be useful if you want to have unicode metadata "
             "values which pdfrw does not yet support (See "
             "https://github.com/pmaupin/pdfrw/issues/39) or if you want the "
             "PDF code to be more human readable.")

    outargs.add_argument(
        "--first-frame-only", action="store_true",
        help="By default, img2pdf will convert multi-frame images like "
             "multi-page TIFF or animated GIF images to one page per frame. "
             "This option will only let the first frame of every multi-frame "
             "input image be converted into a page in the resulting PDF."
            )

    outargs.add_argument(
        "--pillow-limit-break", action="store_true",
        help="img2pdf uses the Python Imaging Library Pillow to read input "
             "images. Pillow limits the maximum input image size to %d pixels "
             "to prevent decompression bomb denial of service attacks. If "
             "your input image contains more pixels than that, use this "
             "option to disable this safety measure during this run of img2pdf"
             %Image.MAX_IMAGE_PIXELS)

    sizeargs = parser.add_argument_group(
        title='Image and page size and layout arguments',
        description='''\
Every input image will be placed on its own page. The image size is controlled
by the dpi value of the input image or, if unset or missing, the default dpi of
%.2f. By default, each page will have the same size as the image it shows.
Thus, there will be no visible border between the image and the page border by
default. If image size and page size are made different from each other by the
options in this section, the image will always be centered in both dimensions.

The image size and page size can be explicitly set using the --imgsize and
--pagesize options, respectively.  If either dimension of the image size is
specified but the same dimension of the page size is not, then the latter will
be derived from the former using an optional minimal distance between the image
and the page border (given by the --border option) and/or a certain fitting
strategy (given by the --fit option). The converse happens if a dimension of
the page size is set but the same dimension of the image size is not.

Any length value in below options is represented by the meta variable L which
is a floating point value with an optional unit appended (without a space
between them). The default unit is pt (1/72 inch, the PDF unit) and other
allowed units are cm (centimeter), mm (millimeter), and in (inch).

Any size argument of the format LxL in the options below specifies the width
and height of a rectangle where the first L represents the width and the second
L represents the height with an optional unit following each value as described
above.  Either width or height may be omitted. If the height is omitted, the
separating x can be omitted as well. Omitting the width requires to prefix the
height with the separating x. The missing dimension will be chosen so to not
change the image aspect ratio. Instead of giving the width and height
explicitly, you may also specify some (case-insensitive) common page sizes such
as letter and A4.  See the epilogue at the bottom for a complete list of the
valid sizes.

The --fit option scales to fit the image into a rectangle that is either
derived from the --imgsize option or otherwise from the --pagesize option.
If the --border option is given in addition to the --imgsize option while the
--pagesize option is not given, then the page size will be calculated from the
image size, respecting the border setting. If the --border option is given in
addition to the --pagesize option while the --imgsize option is not given, then
the image size will be calculated from the page size, respecting the border
setting. If the --border option is given while both the --pagesize and
--imgsize options are passed, then the --border option will be ignored.

''' % default_dpi)

    sizeargs.add_argument(
            '-S', '--pagesize', metavar='LxL', type=parse_pagesize_rectarg,
            help='''
Sets the size of the PDF pages. The short-option is the upper case S because
it is an mnemonic for being bigger than the image size.''')

    sizeargs.add_argument(
            '-s', '--imgsize', metavar='LxL', type=parse_imgsize_rectarg,
            help='''
Sets the size of the images on the PDF pages.  In addition, the unit dpi is
allowed which will set the image size as a value of dots per inch.  Instead of
a unit, width and height values may also have a percentage sign appended,
indicating a resize of the image by that percentage. The short-option is the
lower case s because it is an mnemonic for being smaller than the page size.
''')
    sizeargs.add_argument(
            '-b', '--border', metavar='L[:L]', type=parse_borderarg,
            help='''
Specifies the minimal distance between the image border and the PDF page
border.  This value Is overwritten by explicit values set by --pagesize or
--imgsize.  The value will be used when calculating page dimensions from the
image dimensions or the other way round. One, or two length values can be given
as an argument, separated by a colon. One value specifies the minimal border on
all four sides. Two values specify the minimal border on the top/bottom and
left/right, respectively. It is not possible to specify asymmetric borders
because images will always be centered on the page.
''')
    sizeargs.add_argument(
            '-f', '--fit', metavar='FIT', type=parse_fitarg,
            default=FitMode.into, help='''

If --imgsize is given, fits the image using these dimensions. Otherwise, fit
the image into the dimensions given by --pagesize.  FIT is one of into, fill,
exact, shrink and enlarge. The default value is "into". See the epilogue at the
bottom for a description of the FIT options.

''')
    sizeargs.add_argument(
            '-a', '--auto-orient', action="store_true",
            help='''
If both dimensions of the page are given via --pagesize, conditionally swaps
these dimensions such that the page orientation is the same as the orientation
of the input image. If the orientation of a page gets flipped, then so do the
values set via the --border option.
''')

    metaargs = parser.add_argument_group(
        title='Arguments setting metadata',
        description='Options handling embedded timestamps, title and author '
                    'information.')
    metaargs.add_argument(
        '--title', metavar='title', type=str,
        help='Sets the title metadata value')
    metaargs.add_argument(
        '--author', metavar='author', type=str,
        help='Sets the author metadata value')
    metaargs.add_argument(
        '--creator', metavar='creator', type=str,
        help='Sets the creator metadata value')
    metaargs.add_argument(
        '--producer', metavar='producer', type=str,
        default="img2pdf " + __version__,
        help='Sets the producer metadata value '
             '(default is: img2pdf ' + __version__ + ')')
    metaargs.add_argument(
        '--creationdate', metavar='creationdate', type=valid_date,
        help='Sets the UTC creation date metadata value in YYYY-MM-DD or '
             'YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format '
             'understood by python dateutil module or any format understood '
             'by `date --date`')
    metaargs.add_argument(
        '--moddate', metavar='moddate', type=valid_date,
        help='Sets the UTC modification date metadata value in YYYY-MM-DD '
             'or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format '
             'understood by python dateutil module or any format understood '
             'by `date --date`')
    metaargs.add_argument(
        '--subject', metavar='subject', type=str,
        help='Sets the subject metadata value')
    metaargs.add_argument(
        '--keywords', metavar='kw', type=str, nargs='+',
        help='Sets the keywords metadata value (can be given multiple times)')

    viewerargs = parser.add_argument_group(
        title='PDF viewer arguments',
        description='PDF files can specify how they are meant to be '
                    'presented to the user by a PDF viewer')

    viewerargs.add_argument(
        '--viewer-panes', metavar="PANES", type=parse_panes,
        help='Instruct the PDF viewer which side panes to show. Valid values '
             'are "outlines" and "thumbs". It is not possible to specify both '
             'at the same time.')
    viewerargs.add_argument(
        '--viewer-initial-page', metavar="NUM", type=int,
        help='Instead of showing the first page, instruct the PDF viewer to '
             'show the given page instead. Page numbers start with 1.')
    viewerargs.add_argument(
        '--viewer-magnification', metavar="MAG", type=parse_magnification,
        help='Instruct the PDF viewer to open the PDF with a certain zoom '
             'level. Valid values are either a floating point number giving '
             'the exact zoom level, "fit" (zoom to fit whole page), "fith" '
             '(zoom to fit page width) and "fitbh" (zoom to fit visible page '
             'width).')
    viewerargs.add_argument(
        '--viewer-page-layout', metavar="LAYOUT", type=parse_layout,
        help='Instruct the PDF viewer how to arrange the pages on the screen. '
             'Valid values are "single" (display single pages), "onecolumn" '
             '(one continuous column), "twocolumnright" (two continuous '
             'columns with odd number pages on the right) and "twocolumnleft" '
             '(two continuous columns with odd numbered pages on the left)')
    viewerargs.add_argument(
        '--viewer-fit-window', action="store_true",
        help='Instruct the PDF viewer to resize the window to fit the page '
             'size')
    viewerargs.add_argument(
        '--viewer-center-window', action="store_true",
        help='Instruct the PDF viewer to center the PDF viewer window')
    viewerargs.add_argument(
        '--viewer-fullscreen', action="store_true",
        help='Instruct the PDF viewer to open the PDF in fullscreen mode')

    args = parser.parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)

    if args.pillow_limit_break:
        Image.MAX_IMAGE_PIXELS = None

    layout_fun = get_layout_fun(args.pagesize, args.imgsize, args.border,
                                args.fit, args.auto_orient)

    # if no positional arguments were supplied, read a single image from
    # standard input
    if len(args.images) == 0:
        logging.info("reading image from standard input")
        try:
            args.images = [sys.stdin.buffer.read()]
        except KeyboardInterrupt:
            exit(0)

    # with the number of pages being equal to the number of images, the
    # value passed to --viewer-initial-page must be between 1 and that number
    if args.viewer_initial_page is not None:
        if args.viewer_initial_page < 1:
            parser.print_usage(file=sys.stderr)
            logging.error("%s: error: argument --viewer-initial-page: must be "
                          "greater than zero" % parser.prog)
            exit(2)
        if args.viewer_initial_page > len(args.images):
            parser.print_usage(file=sys.stderr)
            logging.error("%s: error: argument --viewer-initial-page: must be "
                          "less than or equal to the total number of pages" %
                          parser.prog)
            exit(2)

    try:
        convert(
            *args.images, title=args.title, author=args.author,
            creator=args.creator, producer=args.producer,
            creationdate=args.creationdate, moddate=args.moddate,
            subject=args.subject, keywords=args.keywords,
            colorspace=args.colorspace, nodate=args.nodate,
            layout_fun=layout_fun, viewer_panes=args.viewer_panes,
            viewer_initial_page=args.viewer_initial_page,
            viewer_magnification=args.viewer_magnification,
            viewer_page_layout=args.viewer_page_layout,
            viewer_fit_window=args.viewer_fit_window,
            viewer_center_window=args.viewer_center_window,
            viewer_fullscreen=args.viewer_fullscreen, with_pdfrw=not
            args.without_pdfrw, outputstream=args.output,
            first_frame_only=args.first_frame_only)
    except Exception as e:
        logging.error("error: " + str(e))
        if logging.getLogger().isEnabledFor(logging.DEBUG):
            import traceback
            traceback.print_exc(file=sys.stderr)
        exit(1)


if __name__ == '__main__':
    main()
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								#!/usr/bin/env python3
-												Declare file encoding, since it contains some non-ascii chars.

											
										
										
											2017-05-22 16:41:03 +00:00
+								# -*- coding: utf-8 -*-
-												Added python shebang

Added python shebang to be able to launch script directly
											
										
										
											2014-07-26 14:12:40 +00:00
-												license change from GPL to LGPL

											
										
										
											2014-03-30 06:10:12 +00:00
+								# Copyright (C) 2012-2014 Johannes 'josch' Schauer <j.schauer at email.de>
-												add copyright notice

											
										
										
											2013-05-02 06:17:13 +00:00
+								#
-												license change from GPL to LGPL

											
										
										
											2014-03-30 06:10:12 +00:00
+								# This program is free software: you can redistribute it and/or
 								# modify it under the terms of the GNU Lesser General Public
 								# License as published by the Free Software Foundation, either
 								# version 3 of the License, or (at your option) any later
 								# version.
-												add copyright notice

											
										
										
											2013-05-02 06:17:13 +00:00
+								#
 								# This program is distributed in the hope that it will be useful,
 								# but WITHOUT ANY WARRANTY; without even the implied warranty of
 								# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 								# GNU General Public License for more details.
 								#
-												license change from GPL to LGPL

											
										
										
											2014-03-30 06:10:12 +00:00
+								# You should have received a copy of the GNU General Public
 								# License along with this program.  If not, see
 								# <http://www.gnu.org/licenses/>.
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
 								import sys
-												input images can now be file like objects, binary strings with the image content or filenames

											
										
										
											2016-02-14 17:54:59 +00:00
+								import os
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
+								import zlib
 								import argparse
-												Added tests for the package.

											
										
										
											2014-03-01 04:51:53 +00:00
+								from PIL import Image
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
+								from datetime import datetime
-												better jp2 parsing based on jpylyzer

											
										
										
											2013-10-21 13:55:47 +00:00
+								from jp2 import parsejp2
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								from enum import Enum
 								from io import BytesIO
 								import logging
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								import struct
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
-												It is easy enough to create a separate class for the Py2 version.

											
										
										
											2017-05-22 16:41:52 +00:00
+								PY3 = sys.version_info[0] >= 3
-												release version 0.3.0

											
										
										
											2018-07-18 09:30:43 +00:00
+								__version__ = "0.3.0"
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								default_dpi = 96.0
 								papersizes = {
 								    "letter": "8.5inx11in",
 								    "a0":     "841mmx1189mm",
 								    "a1":     "594mmx841mm",
 								    "a2":     "420mmx594mm",
 								    "a3":     "297mmx420mm",
 								    "a4":     "210mmx297mm",
 								    "a5":     "148mmx210mm",
 								    "a6":     "105mmx148mm",
 								}
 								papernames = {
 								    "letter": "Letter",
 								    "a0":     "A0",
 								    "a1":     "A1",
 								    "a2":     "A2",
 								    "a3":     "A3",
 								    "a4":     "A4",
 								    "a5":     "A5",
 								    "a6":     "A6",
 								}
 								FitMode = Enum('FitMode', 'into fill exact shrink enlarge')
 								PageOrientation = Enum('PageOrientation', 'portrait landscape')
-												instead of storing a limited list of supported colorspaces and imageformats, store the items requiring special treatment but do not abort if a value other than the listed ones is supported by PIL

											
										
										
											2016-02-17 17:46:24 +00:00
+								Colorspace = Enum('Colorspace', 'RGB L 1 CMYK CMYK;I RGBA P other')
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 CCITTGroup4 PNG other')
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								PageMode = Enum('PageMode', 'none outlines thumbs')
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								PageLayout = Enum('PageLayout',
 								                  'single onecolumn twocolumnright twocolumnleft')
-												Added tests for the package.

											
										
										
											2014-03-01 04:51:53 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								Magnification = Enum('Magnification', 'fit fith fitbh')
-												Added tests for the package.

											
										
										
											2014-03-01 04:51:53 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								ImgSize = Enum('ImgSize', 'abs perc dpi')
 								Unit = Enum('Unit', 'pt cm mm inch')
 								ImgUnit = Enum('ImgUnit', 'pt cm mm inch perc dpi')
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								class NegativeDimensionError(Exception):
 								    pass
 								class UnsupportedColorspaceError(Exception):
 								    pass
 								class ImageOpenError(Exception):
 								    pass
-												change MonochromeJpegError to JpegColorspaceError and add check for jpegs with alpha channel

											
										
										
											2016-02-16 23:37:13 +00:00
+								class JpegColorspaceError(Exception):
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								    pass
 								class PdfTooLargeError(Exception):
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    pass
 								# without pdfrw this function is a no-op
 								def my_convert_load(string):
 								    return string
-												Added tests for the package.

											
										
										
											2014-03-01 04:51:53 +00:00
-												store times in UTC and understand YYYY-MM-DD, YYYY-MM-DDTHH:MM, YYYY-MM-DDTHH:MM:SS and everything understood by dateutil module and date --date

											
										
										
											2015-03-13 13:29:53 +00:00
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
+								def parse(cont, indent=1):
 								    if type(cont) is dict:
-												add Python 3 support

											
										
										
											2015-01-07 14:56:24 +00:00
+								        return b"<<\n"+b"\n".join(
-												do not encode as utf8 as pdf is ascii, add safer handling across py2/py3

											
										
										
											2015-03-13 10:43:38 +00:00
+								            [4 * indent * b" " + k + b" " + parse(v, indent+1)
-												make output reproducible by sorting and --nodate option

											
										
										
											2015-01-07 15:23:52 +00:00
+								             for k, v in sorted(cont.items())])+b"\n"+4*(indent-1)*b" "+b">>"
-												print floats with four decimal places

											
										
										
											2015-02-16 18:09:34 +00:00
+								    elif type(cont) is int:
-												do not encode as utf8 as pdf is ascii, add safer handling across py2/py3

											
										
										
											2015-03-13 10:43:38 +00:00
+								        return str(cont).encode()
-												print floats with four decimal places

											
										
										
											2015-02-16 18:09:34 +00:00
+								    elif type(cont) is float:
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        if int(cont) == cont:
 								            return parse(int(cont))
 								        else:
 								            return ("%0.4f" % cont).rstrip("0").encode()
 								    elif isinstance(cont, MyPdfDict):
-												fix typos

											
										
										
											2016-06-10 14:39:45 +00:00
+								        # if cont got an identifier, then addobj() has been called with it
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        # and a link to it will be added, otherwise add it inline
 								        if hasattr(cont, "identifier"):
 								            return ("%d 0 R" % cont.identifier).encode()
 								        else:
 								            return parse(cont.content, indent)
 								    elif type(cont) is str or isinstance(cont, bytes):
-												do not encode as utf8 as pdf is ascii, add safer handling across py2/py3

											
										
										
											2015-03-13 10:43:38 +00:00
+								        if type(cont) is str and type(cont) is not bytes:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								            raise TypeError(
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								                "parse must be passed a bytes object in py3. Got: %s" % cont)
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
+								        return cont
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    elif isinstance(cont, list):
-												add Python 3 support

											
										
										
											2015-01-07 14:56:24 +00:00
+								        return b"[ "+b" ".join([parse(c, indent) for c in cont])+b" ]"
-												do not encode as utf8 as pdf is ascii, add safer handling across py2/py3

											
										
										
											2015-03-13 10:43:38 +00:00
+								    else:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								        raise TypeError("cannot handle type %s with content %s" % (type(cont),
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								                                                                   cont))
 								class MyPdfDict(object):
 								    def __init__(self, *args, **kw):
 								        self.content = dict()
 								        if args:
 								            if len(args) == 1:
 								                args = args[0]
 								            self.content.update(args)
 								        self.stream = None
 								        for key, value in kw.items():
 								            if key == "stream":
 								                self.stream = value
 								                self.content[MyPdfName.Length] = len(value)
 								            elif key == "indirect":
 								                pass
 								            else:
 								                self.content[getattr(MyPdfName, key)] = value
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
-												major refactoring

											
										
										
											2013-10-23 10:34:07 +00:00
+								    def tostring(self):
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        if self.stream is not None:
-												Added tests for the package.

											
										
										
											2014-03-01 04:51:53 +00:00
+								            return (
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								                ("%d 0 obj\n" % self.identifier).encode() +
-												Added tests for the package.

											
										
										
											2014-03-01 04:51:53 +00:00
+								                parse(self.content) +
-												add Python 3 support

											
										
										
											2015-01-07 14:56:24 +00:00
+								                b"\nstream\n" + self.stream + b"\nendstream\nendobj\n")
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
+								        else:
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            return ("%d 0 obj\n" % self.identifier).encode() + \
 								                   parse(self.content) + b"\nendobj\n"
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    def __setitem__(self, key, value):
 								        self.content[key] = value
-												add /Pages reference to /Pages object each /page object

											
										
										
											2012-06-15 14:59:31 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    def __getitem__(self, key):
 								        return self.content[key]
 								class MyPdfName():
 								    def __getattr__(self, name):
 								        return b'/' + name.encode('ascii')
-												Make pep8 compliant again

											
										
										
											2017-01-19 10:22:08 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								MyPdfName = MyPdfName()
-												major refactoring

											
										
										
											2013-10-23 10:34:07 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								class MyPdfObject(bytes):
 								    def __new__(cls, string):
 								        return bytes.__new__(cls, string.encode('ascii'))
-												major refactoring

											
										
										
											2013-10-23 10:34:07 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								class MyPdfArray(list):
 								    pass
 								class MyPdfWriter():
 								    def __init__(self, version="1.3"):
 								        self.objects = []
-												Start of converting the module to a proper package.

											
										
										
											2014-03-01 03:57:40 +00:00
+								        # create an incomplete pages object so that a /Parent entry can be
 								        # added to each page
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        self.pages = MyPdfDict(Type=MyPdfName.Pages, Kids=[], Count=0)
 								        self.catalog = MyPdfDict(Pages=self.pages, Type=MyPdfName.Catalog)
 								        self.version = version  # default pdf version 1.3
 								        self.pagearray = []
-												add --verbose flag

											
										
										
											2013-08-30 08:45:43 +00:00
-												major refactoring

											
										
										
											2013-10-23 10:34:07 +00:00
+								    def addobj(self, obj):
 								        newid = len(self.objects)+1
 								        obj.identifier = newid
 								        self.objects.append(obj)
-												allow writing the pdf to a file-like object instead of storing everything in memory

											
										
										
											2016-02-13 07:51:20 +00:00
+								    def tostream(self, info, stream):
-												major refactoring

											
										
										
											2013-10-23 10:34:07 +00:00
+								        xreftable = list()
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        # justification of the random binary garbage in the header from
 								        # adobe:
 								        #
 								        #  > Note: If a PDF file contains binary data, as most do (see Section
 								        #  > 3.1, “Lexical Conventions”), it is recommended that the header
 								        #  > line be immediately followed by a comment line containing at
 								        #  > least four binary characters—that is, characters whose codes are
 								        #  > 128 or greater. This ensures proper behavior of file transfer
 								        #  > applications that inspect data near the beginning of a file to
 								        #  > determine whether to treat the file’s contents as text or as
 								        #  > binary.
 								        #
 								        # the choice of binary characters is arbitrary but those four seem to
 								        # be used elsewhere.
-												allow writing the pdf to a file-like object instead of storing everything in memory

											
										
										
											2016-02-13 07:51:20 +00:00
+								        pdfheader = ('%%PDF-%s\n' % self.version).encode('ascii')
 								        pdfheader += b'%\xe2\xe3\xcf\xd3\n'
 								        stream.write(pdfheader)
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								        # From section 3.4.3 of the PDF Reference (version 1.7):
 								        #
 								        #  > Each entry is exactly 20 bytes long, including the end-of-line
 								        #  > marker.
 								        #  >
 								        #  > [...]
 								        #  >
 								        #  > The format of an in-use entry is
 								        #  > nnnnnnnnnn ggggg n eol
 								        #  > where
 								        #  > nnnnnnnnnn is a 10-digit byte offset
 								        #  > ggggg is a 5-digit generation number
 								        #  > n is a literal keyword identifying this as an in-use entry
 								        #  > eol is a 2-character end-of-line sequence
 								        #  >
 								        #  > [...]
 								        #  >
 								        #  > If the file’s end-of-line marker is a single character (either a
 								        #  > carriage return or a line feed), it is preceded by a single space;
 								        #
-												fix typos

											
										
										
											2016-06-10 14:39:45 +00:00
+								        # Since we chose to use a single character eol marker, we precede it by
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        # a space
-												allow writing the pdf to a file-like object instead of storing everything in memory

											
										
										
											2016-02-13 07:51:20 +00:00
+								        pos = len(pdfheader)
-												add Python 3 support

											
										
										
											2015-01-07 14:56:24 +00:00
+								        xreftable.append(b"0000000000 65535 f \n")
-												major refactoring

											
										
										
											2013-10-23 10:34:07 +00:00
+								        for o in self.objects:
-												allow writing the pdf to a file-like object instead of storing everything in memory

											
										
										
											2016-02-13 07:51:20 +00:00
+								            xreftable.append(("%010d 00000 n \n" % pos).encode())
 								            content = o.tostring()
 								            stream.write(content)
 								            pos += len(content)
 								        xrefoffset = pos
 								        stream.write(b"xref\n")
 								        stream.write(("0 %d\n" % len(xreftable)).encode())
-												major refactoring

											
										
										
											2013-10-23 10:34:07 +00:00
+								        for x in xreftable:
-												allow writing the pdf to a file-like object instead of storing everything in memory

											
										
										
											2016-02-13 07:51:20 +00:00
+								            stream.write(x)
 								        stream.write(b"trailer\n")
 								        stream.write(parse({b"/Size": len(xreftable), b"/Info": info,
 								                            b"/Root": self.catalog})+b"\n")
 								        stream.write(b"startxref\n")
 								        stream.write(("%d\n" % xrefoffset).encode())
 								        stream.write(b"%%EOF\n")
 								        return
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    def addpage(self, page):
 								        page[b"/Parent"] = self.pages
 								        self.pagearray.append(page)
 								        self.pages.content[b"/Kids"].append(page)
 								        self.pages.content[b"/Count"] += 1
 								        self.addobj(page)
-												It is easy enough to create a separate class for the Py2 version.

											
										
										
											2017-05-22 16:41:52 +00:00
+								if PY3:
 								    class MyPdfString():
 								        @classmethod
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								        def encode(cls, string, hextype=False):
 								            if hextype:
 								                return b'< ' + b' '.join(("%06x"%c).encode('ascii') for c in string) + b' >'
 								            else:
 								                try:
 								                    string = string.encode('ascii')
 								                except UnicodeEncodeError:
 								                    string = b"\xfe\xff"+string.encode("utf-16-be")
 								                string = string.replace(b'\\', b'\\\\')
 								                string = string.replace(b'(', b'\\(')
 								                string = string.replace(b')', b'\\)')
 								                return b'(' + string + b')'
-												It is easy enough to create a separate class for the Py2 version.

											
										
										
											2017-05-22 16:41:52 +00:00
+								else:
 								    class MyPdfString(object):
 								        @classmethod
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								        def encode(cls, string, hextype=False):
 								            if hextype:
 								                return b'< ' + b' '.join(("%06x"%c).encode('ascii') for c in string) + b' >'
 								            else:
 								                # This mimics exactely to what pdfrw does.
 								                string = string.replace(b'\\', b'\\\\')
 								                string = string.replace(b'(', b'\\(')
 								                string = string.replace(b')', b'\\)')
 								                return b'(' + string + b')'
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								class pdfdoc(object):
 								    def __init__(self, version="1.3", title=None, author=None, creator=None,
 								                 producer=None, creationdate=None, moddate=None, subject=None,
 								                 keywords=None, nodate=False, panes=None, initial_page=None,
 								                 magnification=None, page_layout=None, fit_window=False,
 								                 center_window=False, fullscreen=False, with_pdfrw=True):
 								        if with_pdfrw:
 								            try:
 								                from pdfrw import PdfWriter, PdfDict, PdfName, PdfString
 								                self.with_pdfrw = True
 								            except ImportError:
 								                PdfWriter = MyPdfWriter
 								                PdfDict = MyPdfDict
 								                PdfName = MyPdfName
 								                PdfString = MyPdfString
 								                self.with_pdfrw = False
 								        else:
 								            PdfWriter = MyPdfWriter
 								            PdfDict = MyPdfDict
 								            PdfName = MyPdfName
 								            PdfString = MyPdfString
 								            self.with_pdfrw = False
 								        now = datetime.now()
 								        self.info = PdfDict(indirect=True)
 								        def datetime_to_pdfdate(dt):
 								            return dt.strftime("%Y%m%d%H%M%SZ")
 								        if title is not None:
 								            self.info[PdfName.Title] = PdfString.encode(title)
 								        if author is not None:
 								            self.info[PdfName.Author] = PdfString.encode(author)
 								        if creator is not None:
 								            self.info[PdfName.Creator] = PdfString.encode(creator)
-												Allow to not add the /Producer field to the metadata

											
										
										
											2017-01-20 03:45:14 +00:00
+								        if producer is not None and producer != "":
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            self.info[PdfName.Producer] = PdfString.encode(producer)
 								        if creationdate is not None:
 								            self.info[PdfName.CreationDate] = \
 								                PdfString.encode("D:"+datetime_to_pdfdate(creationdate))
 								        elif not nodate:
 								            self.info[PdfName.CreationDate] = \
 								                PdfString.encode("D:"+datetime_to_pdfdate(now))
 								        if moddate is not None:
 								            self.info[PdfName.ModDate] = \
 								                PdfString.encode("D:"+datetime_to_pdfdate(moddate))
 								        elif not nodate:
 								            self.info[PdfName.ModDate] = PdfString.encode(
 								                    "D:"+datetime_to_pdfdate(now))
 								        if subject is not None:
 								            self.info[PdfName.Subject] = PdfString.encode(subject)
 								        if keywords is not None:
 								            self.info[PdfName.Keywords] = PdfString.encode(",".join(keywords))
 								        self.writer = PdfWriter()
 								        self.writer.version = version
 								        # this is done because pdfrw adds info, catalog and pages as the first
 								        # three objects in this order
 								        if not self.with_pdfrw:
 								            self.writer.addobj(self.info)
 								            self.writer.addobj(self.writer.catalog)
 								            self.writer.addobj(self.writer.pages)
 								        self.panes = panes
 								        self.initial_page = initial_page
 								        self.magnification = magnification
 								        self.page_layout = page_layout
 								        self.fit_window = fit_window
 								        self.center_window = center_window
 								        self.fullscreen = fullscreen
 								    def add_imagepage(self, color, imgwidthpx, imgheightpx, imgformat, imgdata,
 								                      imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth,
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								                      pageheight, userunit=None, palette=None):
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        if self.with_pdfrw:
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								            from pdfrw import PdfDict, PdfName, PdfObject, PdfString
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            from pdfrw.py23_diffs import convert_load
 								        else:
 								            PdfDict = MyPdfDict
 								            PdfName = MyPdfName
-												Implement automatic monochrome CCITT Group4 encoding via Pillow/libtiff

											
										
										
											2016-07-21 10:49:56 +00:00
+								            PdfObject = MyPdfObject
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								            PdfString = MyPdfString
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            convert_load = my_convert_load
-												Implement automatic monochrome CCITT Group4 encoding via Pillow/libtiff

											
										
										
											2016-07-21 10:49:56 +00:00
+								        if color == Colorspace['1'] or color == Colorspace.L:
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            colorspace = PdfName.DeviceGray
 								        elif color == Colorspace.RGB:
 								            colorspace = PdfName.DeviceRGB
 								        elif color == Colorspace.CMYK or color == Colorspace['CMYK;I']:
 								            colorspace = PdfName.DeviceCMYK
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								        elif color == Colorspace.P:
 								            if self.with_pdfrw:
 								                raise Exception("pdfrw does not support hex strings for palette image input, re-run with --without-pdfrw")
 								            colorspace = [ PdfName.Indexed, PdfName.DeviceRGB, len(palette)-1, PdfString.encode(palette, hextype=True)]
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        else:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								            raise UnsupportedColorspaceError("unsupported color space: %s"
 								                                             % color.name)
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								        # either embed the whole jpeg or deflate the bitmap representation
 								        if imgformat is ImageFormat.JPEG:
-												Filter must not be array for PNG output, so never make it an array

											
										
										
											2018-07-17 21:59:55 +00:00
+								            ofilter = PdfName.DCTDecode
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        elif imgformat is ImageFormat.JPEG2000:
-												Filter must not be array for PNG output, so never make it an array

											
										
										
											2018-07-17 21:59:55 +00:00
+								            ofilter = PdfName.JPXDecode
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            self.writer.version = "1.5"  # jpeg2000 needs pdf 1.5
-												Implement automatic monochrome CCITT Group4 encoding via Pillow/libtiff

											
										
										
											2016-07-21 10:49:56 +00:00
+								        elif imgformat is ImageFormat.CCITTGroup4:
-												CCITTFaxDecode filter must be in an array

											
										
										
											2018-07-17 23:14:06 +00:00
+								            ofilter = [PdfName.CCITTFaxDecode]
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        else:
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								            ofilter = PdfName.FlateDecode
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								        image = PdfDict(stream=convert_load(imgdata))
 								        image[PdfName.Type] = PdfName.XObject
 								        image[PdfName.Subtype] = PdfName.Image
 								        image[PdfName.Filter] = ofilter
 								        image[PdfName.Width] = imgwidthpx
 								        image[PdfName.Height] = imgheightpx
 								        image[PdfName.ColorSpace] = colorspace
-												fix typos

											
										
										
											2016-06-10 14:39:45 +00:00
+								        # hardcoded as PIL doesn't provide bits for non-jpeg formats
-												Implement automatic monochrome CCITT Group4 encoding via Pillow/libtiff

											
										
										
											2016-07-21 10:49:56 +00:00
+								        if imgformat is ImageFormat.CCITTGroup4:
 								            image[PdfName.BitsPerComponent] = 1
 								        else:
-												handle monochrome png properly

											
										
										
											2018-07-17 22:03:31 +00:00
+								            if color == Colorspace['1']:
 								                image[PdfName.BitsPerComponent] = 1
 								            elif color == Colorspace.P:
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								                if len(palette) <= 2**1:
 								                    image[PdfName.BitsPerComponent] = 1
 								                elif len(palette) <= 2**4:
 								                    image[PdfName.BitsPerComponent] = 4
 								                else:
 								                    image[PdfName.BitsPerComponent] = 8
 								            else:
 								                image[PdfName.BitsPerComponent] = 8
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								        if color == Colorspace['CMYK;I']:
 								            # Inverts all four channels
 								            image[PdfName.Decode] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
-												Implement automatic monochrome CCITT Group4 encoding via Pillow/libtiff

											
										
										
											2016-07-21 10:49:56 +00:00
+								        if imgformat is ImageFormat.CCITTGroup4:
 								            decodeparms = PdfDict()
 								            decodeparms[PdfName.K] = -1
 								            decodeparms[PdfName.BlackIs1] = PdfObject('true')
 								            decodeparms[PdfName.Columns] = imgwidthpx
 								            decodeparms[PdfName.Rows] = imgheightpx
 								            image[PdfName.DecodeParms] = [decodeparms]
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								        elif imgformat is ImageFormat.PNG:
 								            decodeparms = PdfDict()
 								            decodeparms[PdfName.Predictor] = 15
 								            if color in [ Colorspace.P, Colorspace['1'], Colorspace.L ]:
 								                decodeparms[PdfName.Colors] = 1
 								            else:
 								                decodeparms[PdfName.Colors] = 3
 								            decodeparms[PdfName.Columns] = imgwidthpx
-												handle monochrome png properly

											
										
										
											2018-07-17 22:03:31 +00:00
+								            if color == Colorspace['1']:
 								                decodeparms[PdfName.BitsPerComponent] = 1
 								            elif color == Colorspace.P:
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								                if len(palette) <= 2**1:
 								                    decodeparms[PdfName.BitsPerComponent] = 1
 								                elif len(palette) <= 2**4:
 								                    decodeparms[PdfName.BitsPerComponent] = 4
 								                else:
 								                    decodeparms[PdfName.BitsPerComponent] = 8
 								            else:
 								                decodeparms[PdfName.BitsPerComponent] = 8
 								            image[PdfName.DecodeParms] = decodeparms
-												Implement automatic monochrome CCITT Group4 encoding via Pillow/libtiff

											
										
										
											2016-07-21 10:49:56 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        text = ("q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" %
 								                (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)).encode("ascii")
-												Changes to pdf page size handling

Changes to `valid_size()`
* accept common page sizes, such as letter and a4.
* parse dimensions of format: AuxBv#, where A is width, u is units, B is height, v is units, # are options.
* accept units: in, cm, mm, pt

Changes to `convert()`:
* resize pages based on dpi calculations
* default resize images into page size (like default resize in imagemagick)
* implement exact resizing (ignore dpi; equiv to ! in imagemagick)

Created `get_ndpi()`:
* provides dpi for page resizing
* implement fill resizing (equiv to ^ in imagemagick)

Other changes:
* default dpi in global variable

											
										
										
											2015-03-20 10:37:30 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        content = PdfDict(stream=convert_load(text))
 								        resources = PdfDict(XObject=PdfDict(Im0=image))
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        page = PdfDict(indirect=True)
 								        page[PdfName.Type] = PdfName.Page
 								        page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight]
 								        page[PdfName.Resources] = resources
 								        page[PdfName.Contents] = content
-												Implement /UserUnit scaling to support oversized PDFs

The original PDF specification supported a maximum of 200x200" pages or
14400 PDF units. In PDF 1.6 rather than remove this limitation, Adobe
added the /UserUnit field for pages, which allows one to specify the
scaling that should be applied for user-facing numbers, while keeping
the internal limit of 14400 units.

Many real-world designs are larger than 200" in one direction. One
example is tractor feed or rolled paper which may be easier to scan in
one continuous run rather than segment into pages.

/UserUnit is independent of the pixel size and resolution of the image.

/UserUnit can also indicate very small page sizes but this is not
implemented here.

											
										
										
											2017-07-28 20:52:33 +00:00
+								        if userunit is not None:
 								            # /UserUnit requires PDF 1.6
 								            if self.writer.version < '1.6':
 								                self.writer.version = '1.6'
 								            page[PdfName.UserUnit] = userunit
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        self.writer.addpage(page)
 								        if not self.with_pdfrw:
 								            self.writer.addobj(content)
 								            self.writer.addobj(image)
 								    def tostring(self):
-												allow writing the pdf to a file-like object instead of storing everything in memory

											
										
										
											2016-02-13 07:51:20 +00:00
+								        stream = BytesIO()
 								        self.tostream(stream)
 								        return stream.getvalue()
 								    def tostream(self, outputstream):
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        if self.with_pdfrw:
 								            from pdfrw import PdfDict, PdfName, PdfArray, PdfObject
 								        else:
 								            PdfDict = MyPdfDict
 								            PdfName = MyPdfName
 								            PdfObject = MyPdfObject
 								            PdfArray = MyPdfArray
 								        NullObject = PdfObject('null')
 								        TrueObject = PdfObject('true')
 								        # We fill the catalog with more information like /ViewerPreferences,
 								        # /PageMode, /PageLayout or /OpenAction because the latter refers to a
 								        # page object which has to be present so that we can get its id.
 								        #
 								        # Furthermore, if using pdfrw, the trailer is cleared every time a page
 								        # is added, so we can only start using it after all pages have been
 								        # written.
 								        if self.with_pdfrw:
 								            catalog = self.writer.trailer.Root
 								        else:
 								            catalog = self.writer.catalog
 								        if self.fullscreen or self.fit_window or self.center_window or \
 								                self.panes is not None:
 								            catalog[PdfName.ViewerPreferences] = PdfDict()
 								        if self.fullscreen:
 								            # this setting might be overwritten later by the page mode
 								            catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \
 								                    = PdfName.UseNone
 								        if self.panes == PageMode.thumbs:
 								            catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \
 								                    = PdfName.UseThumbs
 								            # this setting might be overwritten later if fullscreen
 								            catalog[PdfName.PageMode] = PdfName.UseThumbs
 								        elif self.panes == PageMode.outlines:
 								            catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \
 								                    = PdfName.UseOutlines
 								            # this setting might be overwritten later if fullscreen
 								            catalog[PdfName.PageMode] = PdfName.UseOutlines
 								        elif self.panes in [PageMode.none, None]:
 								            pass
 								        else:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								            raise ValueError("unknown page mode: %s" % self.panes)
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								        if self.fit_window:
 								            catalog[PdfName.ViewerPreferences][PdfName.FitWindow] = TrueObject
 								        if self.center_window:
 								            catalog[PdfName.ViewerPreferences][PdfName.CenterWindow] = \
 								                    TrueObject
 								        if self.fullscreen:
 								            catalog[PdfName.PageMode] = PdfName.FullScreen
 								        # see table 8.2 in section 8.2.1 in
 								        # http://partners.adobe.com/public/developer/en/pdf/PDFReference16.pdf
 								        # Fit - Fits the page to the window.
 								        # FitH - Fits the width of the page to the window.
 								        # FitV - Fits the height of the page to the window.
 								        # FitR - Fits the rectangle specified by the four coordinates to the
 								        #        window.
 								        # FitB - Fits the page bounding box to the window. This basically
 								        #        reduces the amount of whitespace (margins) that is displayed
 								        #        and thus focussing more on the text content.
 								        # FitBH - Fits the width of the page bounding box to the window.
 								        # FitBV - Fits the height of the page bounding box to the window.
 								        # by default the initial page is the first one
 								        initial_page = self.writer.pagearray[0]
 								        # we set the open action here to make sure we open on the requested
 								        # initial page but this value might be overwritten by a custom open
-												fix typos

											
										
										
											2016-06-10 14:39:45 +00:00
+								        # action later while still taking the requested initial page into
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        # account
 								        if self.initial_page is not None:
 								            initial_page = self.writer.pagearray[self.initial_page - 1]
 								            catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.XYZ,
 								                                                    NullObject, NullObject, 0])
 								        if self.magnification == Magnification.fit:
 								            catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.Fit])
 								        elif self.magnification == Magnification.fith:
 								            pagewidth = initial_page[PdfName.MediaBox][2]
 								            catalog[PdfName.OpenAction] = PdfArray(
 								                [initial_page, PdfName.FitH, pagewidth])
 								        elif self.magnification == Magnification.fitbh:
 								            # quick hack to determine the image width on the page
 								            imgwidth = float(initial_page[PdfName.Contents].stream.split()[4])
 								            catalog[PdfName.OpenAction] = PdfArray(
 								                [initial_page, PdfName.FitBH, imgwidth])
 								        elif isinstance(self.magnification, float):
 								            catalog[PdfName.OpenAction] = PdfArray(
 								                [initial_page, PdfName.XYZ, NullObject, NullObject,
 								                 self.magnification])
 								        elif self.magnification is None:
 								            pass
 								        else:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								            raise ValueError("unknown magnification: %s" % self.magnification)
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								        if self.page_layout == PageLayout.single:
 								            catalog[PdfName.PageLayout] = PdfName.SinglePage
 								        elif self.page_layout == PageLayout.onecolumn:
 								            catalog[PdfName.PageLayout] = PdfName.OneColumn
 								        elif self.page_layout == PageLayout.twocolumnright:
 								            catalog[PdfName.PageLayout] = PdfName.TwoColumnRight
 								        elif self.page_layout == PageLayout.twocolumnleft:
 								            catalog[PdfName.PageLayout] = PdfName.TwoColumnLeft
 								        elif self.page_layout is None:
 								            pass
 								        else:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								            raise ValueError("unknown page layout: %s" % self.page_layout)
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								        # now write out the PDF
 								        if self.with_pdfrw:
 								            self.writer.trailer.Info = self.info
-												allow writing the pdf to a file-like object instead of storing everything in memory

											
										
										
											2016-02-13 07:51:20 +00:00
+								            self.writer.write(outputstream)
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        else:
-												allow writing the pdf to a file-like object instead of storing everything in memory

											
										
										
											2016-02-13 07:51:20 +00:00
+								            self.writer.tostream(self.info, outputstream)
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
+								def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None):
-												only use jp2 to parse jpeg2000 if PIL doesn't support jpeg2000

											
										
										
											2016-02-17 19:31:46 +00:00
+								    if imgformat == ImageFormat.JPEG2000 \
 								            and rawdata is not None and imgdata is None:
 								        # this codepath gets called if the PIL installation is not able to
 								        # handle JPEG2000 files
 								        imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata)
 								        if hdpi is None:
 								            hdpi = default_dpi
 								        if vdpi is None:
 								            vdpi = default_dpi
 								        ndpi = (hdpi, vdpi)
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
+								    else:
 								        imgwidthpx, imgheightpx = imgdata.size
 								        ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi))
 								        # In python3, the returned dpi value for some tiff images will
 								        # not be an integer but a float. To make the behaviour of
 								        # img2pdf the same between python2 and python3, we convert that
 								        # float into an integer by rounding.
 								        # Search online for the 72.009 dpi problem for more info.
 								        ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
 								        ics = imgdata.mode
-												Disallow input images with alpha channel as it cannot be preserved in the PDF and would thus make img2pdf not lossless

											
										
										
											2018-03-24 17:55:14 +00:00
+								    if ics in ["LA", "PA", "RGBA"]:
-												src/img2pdf.py: use logging.warning because warn() is deprecated

											
										
										
											2018-07-17 14:07:01 +00:00
+								        logging.warning("Image contains transparency which cannot be retained in PDF.")
 								        logging.warning("img2pdf will not perform a lossy operation.")
 								        logging.warning("You can remove the alpha channel using imagemagick:")
 								        logging.warning("  $ convert input.png -background white -alpha remove -alpha off output.png")
-												src/img2pdf.py: expand error message about alpha channel

											
										
										
											2018-03-27 08:39:24 +00:00
+								        raise Exception("Refusing to work on images with alpha channel")
-												Disallow input images with alpha channel as it cannot be preserved in the PDF and would thus make img2pdf not lossless

											
										
										
											2018-03-24 17:55:14 +00:00
-												src/img2pdf.py: handle Pillow returning a DPI of zero (closes: #36)

											
										
										
											2017-10-18 08:34:59 +00:00
+								    # Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0
 								    # Pillow retrieves the DPI from EXIF if it cannot find the DPI in the JPEG
 								    # header. In that case it can happen that the horizontal and vertical DPI
 								    # are set to zero.
 								    if ndpi == (0, 0):
 								        ndpi = (default_dpi, default_dpi)
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
+								    logging.debug("input dpi = %d x %d", *ndpi)
 								    if colorspace:
 								        color = colorspace
 								        logging.debug("input colorspace (forced) = %s", color)
 								    else:
 								        color = None
 								        for c in Colorspace:
 								            if c.name == ics:
 								                color = c
 								        if color is None:
-												instead of storing a limited list of supported colorspaces and imageformats, store the items requiring special treatment but do not abort if a value other than the listed ones is supported by PIL

											
										
										
											2016-02-17 17:46:24 +00:00
+								            color = Colorspace.other
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
+								        if color == Colorspace.CMYK and imgformat == ImageFormat.JPEG:
 								            # Adobe inverts CMYK JPEGs for some reason, and others
 								            # have followed suit as well. Some software assumes the
 								            # JPEG is inverted if the Adobe tag (APP14), while other
 								            # software assumes all CMYK JPEGs are inverted. I don't
 								            # have enough experience with these to know which is
 								            # better for images currently in the wild, so I'm going
 								            # with the first approach for now.
 								            if "adobe" in imgdata.info:
 								                color = Colorspace['CMYK;I']
 								        logging.debug("input colorspace = %s", color.name)
 								    logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
 								    return (color, ndpi, imgwidthpx, imgheightpx)
-												Break out TIFF G4 transcoding into its own function

											
										
										
											2016-07-21 21:42:48 +00:00
+								def transcode_monochrome(imgdata):
 								    """Convert the open PIL.Image imgdata to compressed CCITT Group4 data"""
 								    from PIL import TiffImagePlugin
 								    logging.debug("Converting monochrome to CCITT Group4")
 								    # Convert the image to Group 4 in memory. If libtiff is not installed and
 								    # Pillow is not compiled against it, .save() will raise an exception.
 								    newimgio = BytesIO()
-												Create a new PIL image before saving as TIFF to prevent libtiff errors

closes: #46

											
										
										
											2018-07-17 22:58:17 +00:00
 								    # we create a whole new PIL image or otherwise it might happen with some
 								    # input images, that libtiff fails an assert and the whole process is
 								    # killed by a SIGABRT:
 								    #   https://gitlab.mister-muffin.de/josch/img2pdf/issues/46
 								    im = Image.frombytes(imgdata.mode, imgdata.size, imgdata.tobytes())
 								    im.save(newimgio, format='TIFF', compression='group4')
-												Break out TIFF G4 transcoding into its own function

											
										
										
											2016-07-21 21:42:48 +00:00
 								    # Open new image in memory
 								    newimgio.seek(0)
 								    newimg = Image.open(newimgio)
 								    # If Pillow is passed an invalid compression argument it will ignore it;
 								    # make sure the image actually got compressed.
 								    if newimg.info['compression'] != 'group4':
 								        raise ValueError("Image not compressed as expected")
 								    # Read the TIFF tags to find the offset(s) of the compressed data strips.
 								    strip_offsets = newimg.tag_v2[TiffImagePlugin.STRIPOFFSETS]
 								    strip_bytes = newimg.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS]
 								    rows_per_strip = newimg.tag_v2[TiffImagePlugin.ROWSPERSTRIP]
 								    # PIL always seems to create a single strip even for very large TIFFs when
 								    # it saves images, so assume we only have to read a single strip.
 								    # A test ~10 GPixel image was still encoded as a single strip. Just to be
 								    # safe check throw an error if there is more than one offset.
 								    if len(strip_offsets) > 1:
 								        raise NotImplementedError("Transcoding multiple strips not supported")
 								    newimgio.seek(strip_offsets[0])
 								    ccittdata = newimgio.read(strip_bytes[0])
 								    return ccittdata
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								def parse_png(rawdata):
 								    pngidat = b""
 								    palette = []
 								    i = 16
 								    while i < len(rawdata):
 								        # once we can require Python >= 3.2 we can use int.from_bytes() instead
 								        n, = struct.unpack('>I', rawdata[i-8:i-4])
 								        if i + n > len(rawdata):
 								            raise Exception("invalid png: %d %d %d"%(i, n, len(rawdata)))
 								        if rawdata[i-4:i] == b"IDAT":
 								            pngidat += rawdata[i:i+n]
 								        elif rawdata[i-4:i] == b"PLTE":
 								            for j in range(i, i+n, 3):
 								                # with int.from_bytes() we would not have to prepend extra zeroes
 								                color, = struct.unpack('>I', b'\x00'+rawdata[j:j+3])
 								                palette.append(color)
 								        i += n
 								        i += 12
 								    return pngidat, palette
-												Break out TIFF G4 transcoding into its own function

											
										
										
											2016-07-21 21:42:48 +00:00
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
+								def read_images(rawdata, colorspace, first_frame_only=False):
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    im = BytesIO(rawdata)
 								    im.seek(0)
-												only use jp2 to parse jpeg2000 if PIL doesn't support jpeg2000

											
										
										
											2016-02-17 19:31:46 +00:00
+								    imgdata = None
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    try:
 								        imgdata = Image.open(im)
 								    except IOError as e:
 								        # test if it is a jpeg2000 image
-												Fix rawdata[:12] bytes/str comparison

											
										
										
											2017-08-20 05:04:16 +00:00
+								        if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								            raise ImageOpenError("cannot read input image (not jpeg2000). "
 								                                 "PIL: error reading image: %s" % e)
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        # image is jpeg2000
 								        imgformat = ImageFormat.JPEG2000
 								    else:
 								        imgformat = None
 								        for f in ImageFormat:
 								            if f.name == imgdata.format:
 								                imgformat = f
 								        if imgformat is None:
-												instead of storing a limited list of supported colorspaces and imageformats, store the items requiring special treatment but do not abort if a value other than the listed ones is supported by PIL

											
										
										
											2016-02-17 17:46:24 +00:00
+								            imgformat = ImageFormat.other
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								    logging.debug("imgformat = %s", imgformat.name)
 								    # depending on the input format, determine whether to pass the raw
 								    # image or the zlib compressed color information
-												Interlaced PNGs cannot be directly embedded but have to be re-encoded

											
										
										
											2018-03-24 17:59:02 +00:00
 								    # JPEG and JPEG2000 can be embedded into the PDF as-is
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000:
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
+								        color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
 								                imgdata, imgformat, default_dpi, colorspace, rawdata)
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        if color == Colorspace['1']:
-												change MonochromeJpegError to JpegColorspaceError and add check for jpegs with alpha channel

											
										
										
											2016-02-16 23:37:13 +00:00
+								            raise JpegColorspaceError("jpeg can't be monochrome")
-												support for palette images for better GIF support

											
										
										
											2016-02-16 23:35:40 +00:00
+								        if color == Colorspace['P']:
-												change MonochromeJpegError to JpegColorspaceError and add check for jpegs with alpha channel

											
										
										
											2016-02-16 23:37:13 +00:00
+								            raise JpegColorspaceError("jpeg can't have a color palette")
 								        if color == Colorspace['RGBA']:
 								            raise JpegColorspaceError("jpeg can't have an alpha channel")
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
+								        im.close()
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								        return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx, [])]
-												Interlaced PNGs cannot be directly embedded but have to be re-encoded

											
										
										
											2018-03-24 17:59:02 +00:00
 								    # We can directly embed the IDAT chunk of PNG images if the PNG is not
 								    # interlaced
 								    #
 								    # PIL does not provide the information whether a PNG was stored interlaced
 								    # or not. Thus, we retrieve that info manually by looking at byte 13 in the
 								    # IHDR chunk. We know where to find that in the file because the IHDR chunk
 								    # must be the first chunk.
 								    if imgformat == ImageFormat.PNG and rawdata[28] == 0:
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								        color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
 								                imgdata, imgformat, default_dpi, colorspace, rawdata)
 								        pngidat, palette = parse_png(rawdata)
 								        return [(color, ndpi, imgformat, pngidat, imgwidthpx, imgheightpx, palette)]
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
-												Interlaced PNGs cannot be directly embedded but have to be re-encoded

											
										
										
											2018-03-24 17:59:02 +00:00
+								    # Everything else has to be encoded
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
-												Interlaced PNGs cannot be directly embedded but have to be re-encoded

											
										
										
											2018-03-24 17:59:02 +00:00
+								    result = []
 								    img_page_count = 0
 								    # loop through all frames of the image (example: multipage TIFF)
 								    while True:
 								        try:
 								            imgdata.seek(img_page_count)
 								        except EOFError:
 								            break
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
-												Interlaced PNGs cannot be directly embedded but have to be re-encoded

											
										
										
											2018-03-24 17:59:02 +00:00
+								        if first_frame_only and img_page_count > 0:
 								            break
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
-												Interlaced PNGs cannot be directly embedded but have to be re-encoded

											
										
										
											2018-03-24 17:59:02 +00:00
+								        logging.debug("Converting frame: %d" % img_page_count)
 								        color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
 								                imgdata, imgformat, default_dpi, colorspace)
 								        newimg = None
 								        if color == Colorspace['1']:
 								            try:
 								                ccittdata = transcode_monochrome(imgdata)
 								                imgformat = ImageFormat.CCITTGroup4
 								                result.append((color, ndpi, imgformat, ccittdata,
-												CCITTGroup4 must return empty palette

											
										
										
											2018-07-17 22:57:43 +00:00
+								                               imgwidthpx, imgheightpx, []))
-												Interlaced PNGs cannot be directly embedded but have to be re-encoded

											
										
										
											2018-03-24 17:59:02 +00:00
+								                img_page_count += 1
 								                continue
 								            except Exception as e:
 								                logging.debug(e)
 								                logging.debug("Converting colorspace 1 to L")
 								                newimg = imgdata.convert('L')
 								                color = Colorspace.L
 								        elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK,
 								                       Colorspace["CMYK;I"], Colorspace.P]:
 								            logging.debug("Colorspace is OK: %s", color)
 								            newimg = imgdata
 								        else:
-												src/img2pdf.py: do not allow transparency

											
										
										
											2018-07-17 14:26:32 +00:00
+								            raise ValueError("unknown or unsupported colorspace: %s" % color.name)
-												src/img2pdf.py: do not use PNG encoding for CMYK input

											
										
										
											2018-07-17 14:47:27 +00:00
+								        # the PNG format does not support CMYK, so we fall back to normal
 								        # compression
 								        if color in [Colorspace.CMYK, Colorspace["CMYK;I"]]:
 								            imggz = zlib.compress(newimg.tobytes())
 								            result.append((color, ndpi, imgformat, imggz, imgwidthpx,
 								                           imgheightpx, []))
 								        else:
 								            # cheapo version to retrieve a PNG encoding of the payload is to
 								            # just save it with PIL. In the future this could be replaced by
 								            # dedicated function applying the Paeth PNG filter to the raw pixel
 								            pngbuffer = BytesIO()
 								            newimg.save(pngbuffer, format="png")
 								            pngidat, palette = parse_png(pngbuffer.getvalue())
 								            imgformat = ImageFormat.PNG
 								            result.append((color, ndpi, imgformat, pngidat, imgwidthpx,
 								                           imgheightpx, palette))
-												Interlaced PNGs cannot be directly embedded but have to be re-encoded

											
										
										
											2018-03-24 17:59:02 +00:00
+								        img_page_count += 1
 								    # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
 								    # close() method
 								    try:
 								        imgdata.close()
 								    except AttributeError:
 								        pass
 								    im.close()
 								    return result
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								# converts a length in pixels to a length in PDF units (1/72 of an inch)
 								def px_to_pt(length, dpi):
-												Python 2 does not automatically convert integers to floats, so we have to be a bit more explicit.

											
										
										
											2017-05-22 16:32:18 +00:00
+								    return 72.0*length/dpi
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								def cm_to_pt(length):
-												Python 2 does not automatically convert integers to floats, so we have to be a bit more explicit.

											
										
										
											2017-05-22 16:32:18 +00:00
+								    return (72.0*length)/2.54
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								def mm_to_pt(length):
-												Python 2 does not automatically convert integers to floats, so we have to be a bit more explicit.

											
										
										
											2017-05-22 16:32:18 +00:00
+								    return (72.0*length)/25.4
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								def in_to_pt(length):
-												Python 2 does not automatically convert integers to floats, so we have to be a bit more explicit.

											
										
										
											2017-05-22 16:32:18 +00:00
+								    return 72.0*length
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
-												restore pep8 compliance

											
										
										
											2017-01-20 08:19:30 +00:00
+								def get_layout_fun(pagesize=None, imgsize=None, border=None, fit=None,
 								                   auto_orient=False):
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    def fitfun(fit, imgwidth, imgheight, fitwidth, fitheight):
 								        if fitwidth is None and fitheight is None:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								            raise ValueError("fitwidth and fitheight cannot both be None")
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        # if fit is fill or enlarge then it is okay if one of the dimensions
 								        # are negative but one of them must still be positive
 								        # if fit is not fill or enlarge then both dimensions must be positive
 								        if fit in [FitMode.fill, FitMode.enlarge] and \
-												restore feature that it is allowed to only specify width or height

											
										
										
											2016-02-17 16:51:57 +00:00
+								                fitwidth is not None and fitwidth < 0 and \
 								                fitheight is not None and fitheight < 0:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								            raise ValueError("cannot fit into a rectangle where both "
 								                             "dimensions are negative")
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        elif fit not in [FitMode.fill, FitMode.enlarge] and \
-												restore feature that it is allowed to only specify width or height

											
										
										
											2016-02-17 16:51:57 +00:00
+								                ((fitwidth is not None and fitwidth < 0) or
 								                    (fitheight is not None and fitheight < 0)):
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            raise Exception("cannot fit into a rectangle where either "
 								                            "dimensions are negative")
 								        def default():
 								            if fitwidth is not None and fitheight is not None:
 								                newimgwidth = fitwidth
 								                newimgheight = (newimgwidth * imgheight)/imgwidth
 								                if newimgheight > fitheight:
 								                    newimgheight = fitheight
 								                    newimgwidth = (newimgheight * imgwidth)/imgheight
 								            elif fitwidth is None and fitheight is not None:
 								                newimgheight = fitheight
 								                newimgwidth = (newimgheight * imgwidth)/imgheight
 								            elif fitheight is None and fitwidth is not None:
 								                newimgwidth = fitwidth
 								                newimgheight = (newimgwidth * imgheight)/imgwidth
-												support file objects as input

											
										
										
											2015-02-16 06:39:07 +00:00
+								            else:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								                raise ValueError("fitwidth and fitheight cannot both be None")
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            return newimgwidth, newimgheight
 								        if fit is None or fit == FitMode.into:
 								            return default()
 								        elif fit == FitMode.fill:
 								            if fitwidth is not None and fitheight is not None:
 								                newimgwidth = fitwidth
 								                newimgheight = (newimgwidth * imgheight)/imgwidth
 								                if newimgheight < fitheight:
 								                    newimgheight = fitheight
 								                    newimgwidth = (newimgheight * imgwidth)/imgheight
 								            elif fitwidth is None and fitheight is not None:
 								                newimgheight = fitheight
 								                newimgwidth = (newimgheight * imgwidth)/imgheight
 								            elif fitheight is None and fitwidth is not None:
 								                newimgwidth = fitwidth
 								                newimgheight = (newimgwidth * imgheight)/imgwidth
 								            else:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								                raise ValueError("fitwidth and fitheight cannot both be None")
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            return newimgwidth, newimgheight
 								        elif fit == FitMode.exact:
 								            if fitwidth is not None and fitheight is not None:
 								                return fitwidth, fitheight
 								            elif fitwidth is None and fitheight is not None:
 								                newimgheight = fitheight
 								                newimgwidth = (newimgheight * imgwidth)/imgheight
 								            elif fitheight is None and fitwidth is not None:
 								                newimgwidth = fitwidth
 								                newimgheight = (newimgwidth * imgheight)/imgwidth
 								            else:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								                raise ValueError("fitwidth and fitheight cannot both be None")
-												fix --fit=exact if one value is missing

											
										
										
											2016-02-17 17:11:11 +00:00
+								            return newimgwidth, newimgheight
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        elif fit == FitMode.shrink:
 								            if fitwidth is not None and fitheight is not None:
 								                if imgwidth <= fitwidth and imgheight <= fitheight:
 								                    return imgwidth, imgheight
 								            elif fitwidth is None and fitheight is not None:
 								                if imgheight <= fitheight:
 								                    return imgwidth, imgheight
 								            elif fitheight is None and fitwidth is not None:
 								                if imgwidth <= fitwidth:
 								                    return imgwidth, imgheight
-												support file objects as input

											
										
										
											2015-02-16 06:39:07 +00:00
+								            else:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								                raise ValueError("fitwidth and fitheight cannot both be None")
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            return default()
 								        elif fit == FitMode.enlarge:
 								            if fitwidth is not None and fitheight is not None:
 								                if imgwidth > fitwidth or imgheight > fitheight:
 								                    return imgwidth, imgheight
 								            elif fitwidth is None and fitheight is not None:
 								                if imgheight > fitheight:
 								                    return imgwidth, imgheight
 								            elif fitheight is None and fitwidth is not None:
 								                if imgwidth > fitwidth:
 								                    return imgwidth, imgheight
 								            else:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								                raise ValueError("fitwidth and fitheight cannot both be None")
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            return default()
-												support file objects as input

											
										
										
											2015-02-16 06:39:07 +00:00
+								        else:
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            raise NotImplementedError
 								    # if no layout arguments are given, then the image size is equal to the
 								    # page size and will be drawn with the default dpi
 								    if pagesize is None and imgsize is None and border is None:
 								        return default_layout_fun
 								    if pagesize is None and imgsize is None and border is not None:
 								        def layout_fun(imgwidthpx, imgheightpx, ndpi):
 								            imgwidthpdf = px_to_pt(imgwidthpx, ndpi[0])
 								            imgheightpdf = px_to_pt(imgheightpx, ndpi[1])
 								            pagewidth = imgwidthpdf+2*border[1]
 								            pageheight = imgheightpdf+2*border[0]
 								            return pagewidth, pageheight, imgwidthpdf, imgheightpdf
 								        return layout_fun
 								    if border is None:
 								        border = (0, 0)
 								    # if the pagesize is given but the imagesize is not, then the imagesize
 								    # will be calculated from the pagesize, taking into account the border
 								    # and the fitting
 								    if pagesize is not None and imgsize is None:
 								        def layout_fun(imgwidthpx, imgheightpx, ndpi):
 								            if pagesize[0] is not None and pagesize[1] is not None and \
 								                    auto_orient and \
 								                    ((imgwidthpx > imgheightpx and
 								                     pagesize[0] < pagesize[1]) or
 								                     (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])):
 								                pagewidth, pageheight = pagesize[1], pagesize[0]
 								                newborder = border[1], border[0]
-												Convert unrecognized colorspaces to RGB

Instead of crashing on an unrecognized colorspace, we now do
imgdata.convert('RGB').

											
										
										
											2014-11-06 07:47:42 +00:00
+								            else:
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								                pagewidth, pageheight = pagesize[0], pagesize[1]
 								                newborder = border
 								            if pagewidth is not None:
-												Fix bug where horizontal and vertical borders where switched when page size was also given. Thanks Erik./a.out| sox -c 1 -e unsigned-integer -r 8000 -t u8 - -d --buffer 32 Closes #30

											
										
										
											2016-05-26 06:19:34 +00:00
+								                fitwidth = pagewidth-2*newborder[1]
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            else:
 								                fitwidth = None
 								            if pageheight is not None:
-												Fix bug where horizontal and vertical borders where switched when page size was also given. Thanks Erik./a.out| sox -c 1 -e unsigned-integer -r 8000 -t u8 - -d --buffer 32 Closes #30

											
										
										
											2016-05-26 06:19:34 +00:00
+								                fitheight = pageheight-2*newborder[0]
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            else:
 								                fitheight = None
 								            if fit in [FitMode.fill, FitMode.enlarge] and \
 								                    fitwidth is not None and fitwidth < 0 and \
 								                    fitheight is not None and fitheight < 0:
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								                raise NegativeDimensionError(
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								                    "at least one border dimension musts be smaller than half "
 								                    "the respective page dimension")
 								            elif fit not in [FitMode.fill, FitMode.enlarge] \
 								                    and ((fitwidth is not None and fitwidth < 0) or
 								                         (fitheight is not None and fitheight < 0)):
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								                raise NegativeDimensionError(
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								                    "one border dimension is larger than half of the "
 								                    "respective page dimension")
 								            imgwidthpdf, imgheightpdf = \
 								                fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]),
 								                       px_to_pt(imgheightpx, ndpi[1]),
 								                       fitwidth, fitheight)
 								            if pagewidth is None:
 								                pagewidth = imgwidthpdf+border[1]*2
 								            if pageheight is None:
 								                pageheight = imgheightpdf+border[0]*2
 								            return pagewidth, pageheight, imgwidthpdf, imgheightpdf
 								        return layout_fun
 								    def scale_imgsize(s, px, dpi):
 								        if s is None:
 								            return None
 								        mode, value = s
 								        if mode == ImgSize.abs:
 								            return value
 								        if mode == ImgSize.perc:
 								            return (px_to_pt(px, dpi)*value)/100
 								        if mode == ImgSize.dpi:
 								            return px_to_pt(px, value)
 								        raise NotImplementedError
 								    if pagesize is None and imgsize is not None:
 								        def layout_fun(imgwidthpx, imgheightpx, ndpi):
 								            imgwidthpdf, imgheightpdf = \
 								                    fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]),
 								                           px_to_pt(imgheightpx, ndpi[1]),
 								                           scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]),
 								                           scale_imgsize(imgsize[1], imgheightpx, ndpi[1]))
 								            pagewidth = imgwidthpdf+2*border[1]
 								            pageheight = imgheightpdf+2*border[0]
 								            return pagewidth, pageheight, imgwidthpdf, imgheightpdf
 								        return layout_fun
 								    if pagesize is not None and imgsize is not None:
 								        def layout_fun(imgwidthpx, imgheightpx, ndpi):
 								            if pagesize[0] is not None and pagesize[1] is not None and \
 								                    auto_orient and \
 								                    ((imgwidthpx > imgheightpx and
 								                      pagesize[0] < pagesize[1]) or
 								                     (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])):
 								                pagewidth, pageheight = pagesize[1], pagesize[0]
 								            else:
 								                pagewidth, pageheight = pagesize[0], pagesize[1]
 								            imgwidthpdf, imgheightpdf = \
 								                fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]),
 								                       px_to_pt(imgheightpx, ndpi[1]),
 								                       scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]),
 								                       scale_imgsize(imgsize[1], imgheightpx, ndpi[1]))
 								            return pagewidth, pageheight, imgwidthpdf, imgheightpdf
 								        return layout_fun
 								    raise NotImplementedError
-												major refactoring

											
										
										
											2013-10-23 10:34:07 +00:00
-												add options to specify pdf dimensions in points

add options specify output pdf dimensions in points:  -x width;  -y height.
											
										
										
											2014-08-04 15:25:07 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								def default_layout_fun(imgwidthpx, imgheightpx, ndpi):
 								    imgwidthpdf = pagewidth = px_to_pt(imgwidthpx, ndpi[0])
 								    imgheightpdf = pageheight = px_to_pt(imgheightpx, ndpi[1])
 								    return pagewidth, pageheight, imgwidthpdf, imgheightpdf
-												src/img2pdf.py: add get_fixed_dpi_layout_fun

											
										
										
											2016-02-13 07:59:52 +00:00
+								def get_fixed_dpi_layout_fun(fixed_dpi):
 								    """Layout function that overrides whatever DPI is claimed in input images.
 								    >>> layout_fun = get_fixed_dpi_layout_fun((300, 300))
 								    >>> convert(image1, layout_fun=layout_fun, ... outputstream=...)
 								    """
 								    def fixed_dpi_layout_fun(imgwidthpx, imgheightpx, ndpi):
 								        return default_layout_fun(imgwidthpx, imgheightpx, fixed_dpi)
 								    return fixed_dpi_layout_fun
-												Implement /UserUnit scaling to support oversized PDFs

The original PDF specification supported a maximum of 200x200" pages or
14400 PDF units. In PDF 1.6 rather than remove this limitation, Adobe
added the /UserUnit field for pages, which allows one to specify the
scaling that should be applied for user-facing numbers, while keeping
the internal limit of 14400 units.

Many real-world designs are larger than 200" in one direction. One
example is tractor feed or rolled paper which may be easier to scan in
one continuous run rather than segment into pages.

/UserUnit is independent of the pixel size and resolution of the image.

/UserUnit can also indicate very small page sizes but this is not
implemented here.

											
										
										
											2017-07-28 20:52:33 +00:00
+								def find_scale(pagewidth, pageheight):
 								    """Find the power of 10 (10, 100, 1000...) that will reduce the scale
 								    below the PDF specification limit of 14400 PDF units (=200 inches)"""
 								    from math import log10, ceil
 								    major = max(pagewidth, pageheight)
 								    oversized = major / 14400.0
 								    return 10 ** ceil(log10(oversized))
-												allow writing the pdf to a file-like object instead of storing everything in memory

											
										
										
											2016-02-13 07:51:20 +00:00
+								# given one or more input image, depending on outputstream, either return a
 								# string containing the whole PDF if outputstream is None or write the PDF
 								# data to the given file-like object and return None
-												input images can now be file like objects, binary strings with the image content or filenames

											
										
										
											2016-02-14 17:54:59 +00:00
+								#
 								# Input images can be given as file like objects (they must implement read()),
 								# as a binary string representing the image content or as filenames to the
 								# images.
-												Produce port that is fully API compatible.

											
										
										
											2017-05-22 16:43:27 +00:00
+								def convert(*images, **kwargs):
 								    _default_kwargs = dict(
 								        title=None,
 								        author=None, creator=None, producer=None, creationdate=None,
 								        moddate=None, subject=None, keywords=None, colorspace=None,
 								        nodate=False, layout_fun=default_layout_fun, viewer_panes=None,
 								        viewer_initial_page=None, viewer_magnification=None,
 								        viewer_page_layout=None, viewer_fit_window=False,
 								        viewer_center_window=False, viewer_fullscreen=False,
-												Implement /UserUnit scaling to support oversized PDFs

The original PDF specification supported a maximum of 200x200" pages or
14400 PDF units. In PDF 1.6 rather than remove this limitation, Adobe
added the /UserUnit field for pages, which allows one to specify the
scaling that should be applied for user-facing numbers, while keeping
the internal limit of 14400 units.

Many real-world designs are larger than 200" in one direction. One
example is tractor feed or rolled paper which may be easier to scan in
one continuous run rather than segment into pages.

/UserUnit is independent of the pixel size and resolution of the image.

/UserUnit can also indicate very small page sizes but this is not
implemented here.

											
										
										
											2017-07-28 20:52:33 +00:00
+								        with_pdfrw=True, outputstream=None, first_frame_only=False,
 								        allow_oversized=True)
-												Produce port that is fully API compatible.

											
										
										
											2017-05-22 16:43:27 +00:00
+								    for kwname, default in _default_kwargs.items():
 								        if kwname not in kwargs:
 								            kwargs[kwname] = default
 								    pdf = pdfdoc(
 								        "1.3",
 								        kwargs['title'], kwargs['author'], kwargs['creator'],
 								        kwargs['producer'], kwargs['creationdate'], kwargs['moddate'],
 								        kwargs['subject'], kwargs['keywords'], kwargs['nodate'],
 								        kwargs['viewer_panes'], kwargs['viewer_initial_page'],
 								        kwargs['viewer_magnification'], kwargs['viewer_page_layout'],
 								        kwargs['viewer_fit_window'], kwargs['viewer_center_window'],
 								        kwargs['viewer_fullscreen'], kwargs['with_pdfrw'])
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
-												restore backwards compatibility and allow passing images as a list

											
										
										
											2017-01-19 19:32:16 +00:00
+								    # backwards compatibility with older img2pdf versions where the first
 								    # argument to the function had to be given as a list
 								    if len(images) == 1:
 								        # if only one argument was given and it is a list, expand it
 								        if isinstance(images[0], (list, tuple)):
 								            images = images[0]
-												Produce port that is fully API compatible.

											
										
										
											2017-05-22 16:43:27 +00:00
+								    if not isinstance(images, (list, tuple)):
 								        images = [images]
-												input images can now be file like objects, binary strings with the image content or filenames

											
										
										
											2016-02-14 17:54:59 +00:00
+								    for img in images:
 								        # img is allowed to be a path, a binary string representing image data
 								        # or a file-like object (really anything that implements read())
 								        try:
 								            rawdata = img.read()
 								        except AttributeError:
-												If input to convert() doesn't have a write() member, then it must be str or bytes type

											
										
										
											2017-01-19 19:32:46 +00:00
+								            if not isinstance(img, (str, bytes)):
 								                raise TypeError(
 								                        "Neither implements read() nor is str or bytes")
-												input images can now be file like objects, binary strings with the image content or filenames

											
										
										
											2016-02-14 17:54:59 +00:00
+								            # the thing doesn't have a read() function, so try if we can treat
 								            # it as a file name
 								            try:
 								                with open(img, "rb") as f:
 								                    rawdata = f.read()
 								            except:
 								                # whatever the exception is (string could contain NUL
 								                # characters or the path could just not exist) it's not a file
 								                # name so we now try treating it as raw image content
 								                rawdata = img
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								        for color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx, palette \
-												Produce port that is fully API compatible.

											
										
										
											2017-05-22 16:43:27 +00:00
+								                in read_images(
 								                    rawdata, kwargs['colorspace'], kwargs['first_frame_only']):
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
+								            pagewidth, pageheight, imgwidthpdf, imgheightpdf = \
-												Produce port that is fully API compatible.

											
										
										
											2017-05-22 16:43:27 +00:00
+								                kwargs['layout_fun'](imgwidthpx, imgheightpx, ndpi)
-												Implement /UserUnit scaling to support oversized PDFs

The original PDF specification supported a maximum of 200x200" pages or
14400 PDF units. In PDF 1.6 rather than remove this limitation, Adobe
added the /UserUnit field for pages, which allows one to specify the
scaling that should be applied for user-facing numbers, while keeping
the internal limit of 14400 units.

Many real-world designs are larger than 200" in one direction. One
example is tractor feed or rolled paper which may be easier to scan in
one continuous run rather than segment into pages.

/UserUnit is independent of the pixel size and resolution of the image.

/UserUnit can also indicate very small page sizes but this is not
implemented here.

											
										
										
											2017-07-28 20:52:33 +00:00
 								            userunit = None
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
+								            if pagewidth < 3.00 or pageheight < 3.00:
 								                logging.warning("pdf width or height is below 3.00 - too "
 								                                "small for some viewers!")
 								            elif pagewidth > 14400.0 or pageheight > 14400.0:
-												Implement /UserUnit scaling to support oversized PDFs

The original PDF specification supported a maximum of 200x200" pages or
14400 PDF units. In PDF 1.6 rather than remove this limitation, Adobe
added the /UserUnit field for pages, which allows one to specify the
scaling that should be applied for user-facing numbers, while keeping
the internal limit of 14400 units.

Many real-world designs are larger than 200" in one direction. One
example is tractor feed or rolled paper which may be easier to scan in
one continuous run rather than segment into pages.

/UserUnit is independent of the pixel size and resolution of the image.

/UserUnit can also indicate very small page sizes but this is not
implemented here.

											
										
										
											2017-07-28 20:52:33 +00:00
+								                if kwargs['allow_oversized']:
 								                    userunit = find_scale(pagewidth, pageheight)
 								                    pagewidth /= userunit
 								                    pageheight /= userunit
 								                    imgwidthpdf /= userunit
 								                    imgheightpdf /= userunit
 								                else:
 								                    raise PdfTooLargeError(
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
+								                        "pdf width or height must not exceed 200 inches.")
 								            # the image is always centered on the page
 								            imgxpdf = (pagewidth - imgwidthpdf)/2.0
 								            imgypdf = (pageheight - imgheightpdf)/2.0
 								            pdf.add_imagepage(color, imgwidthpx, imgheightpx, imgformat,
 								                              imgdata, imgwidthpdf, imgheightpdf, imgxpdf,
-												Proof of concept of using PDF DecodeParms for storing pixel data with PNG compression

											
										
										
											2018-03-15 10:31:36 +00:00
+								                              imgypdf, pagewidth, pageheight, userunit, palette)
-												major refactoring

											
										
										
											2013-10-23 10:34:07 +00:00
-												Produce port that is fully API compatible.

											
										
										
											2017-05-22 16:43:27 +00:00
+								    if kwargs['outputstream']:
 								        pdf.tostream(kwargs['outputstream'])
-												allow writing the pdf to a file-like object instead of storing everything in memory

											
										
										
											2016-02-13 07:51:20 +00:00
+								        return
-												major refactoring

											
										
										
											2013-10-23 10:34:07 +00:00
+								    return pdf.tostring()
-												initial commit

											
										
										
											2012-03-29 09:08:32 +00:00
-												Changes to pdf page size handling

Changes to `valid_size()`
* accept common page sizes, such as letter and a4.
* parse dimensions of format: AuxBv#, where A is width, u is units, B is height, v is units, # are options.
* accept units: in, cm, mm, pt

Changes to `convert()`:
* resize pages based on dpi calculations
* default resize images into page size (like default resize in imagemagick)
* implement exact resizing (ignore dpi; equiv to ! in imagemagick)

Created `get_ndpi()`:
* provides dpi for page resizing
* implement fill resizing (equiv to ^ in imagemagick)

Other changes:
* default dpi in global variable

											
										
										
											2015-03-20 10:37:30 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								def parse_num(num, name):
 								    if num == '':
 								        return None
 								    unit = None
 								    if num.endswith("pt"):
 								        unit = Unit.pt
 								    elif num.endswith("cm"):
 								        unit = Unit.cm
 								    elif num.endswith("mm"):
 								        unit = Unit.mm
 								    elif num.endswith("in"):
 								        unit = Unit.inch
 								    else:
 								        try:
 								            num = float(num)
 								        except ValueError:
 								            msg = "%s is not a floating point number and doesn't have a " \
 								                  "valid unit: %s" % (name, num)
 								            raise argparse.ArgumentTypeError(msg)
 								    if unit is None:
 								        unit = Unit.pt
 								    else:
 								        num = num[:-2]
 								        try:
 								            num = float(num)
 								        except ValueError:
 								            msg = "%s is not a floating point number: %s" % (name, num)
 								            raise argparse.ArgumentTypeError(msg)
 								    if unit == Unit.cm:
 								        num = cm_to_pt(num)
 								    elif unit == Unit.mm:
 								        num = mm_to_pt(num)
 								    elif unit == Unit.inch:
 								        num = in_to_pt(num)
 								    return num
 								def parse_imgsize_num(num, name):
 								    if num == '':
 								        return None
 								    unit = None
 								    if num.endswith("pt"):
 								        unit = ImgUnit.pt
 								    elif num.endswith("cm"):
 								        unit = ImgUnit.cm
 								    elif num.endswith("mm"):
 								        unit = ImgUnit.mm
 								    elif num.endswith("in"):
 								        unit = ImgUnit.inch
 								    elif num.endswith("dpi"):
 								        unit = ImgUnit.dpi
 								    elif num.endswith("%"):
 								        unit = ImgUnit.perc
 								    else:
 								        try:
 								            num = float(num)
 								        except ValueError:
 								            msg = "%s is not a floating point number and doesn't have a " \
 								                  "valid unit: %s" % (name, num)
 								            raise argparse.ArgumentTypeError(msg)
 								    if unit is None:
 								        unit = ImgUnit.pt
 								    else:
-												fix parsing of dpi and % units

											
										
										
											2016-02-10 08:32:38 +00:00
+								        # strip off unit from string
 								        if unit == ImgUnit.dpi:
 								            num = num[:-3]
 								        elif unit == ImgUnit.perc:
 								            num = num[:-1]
 								        else:
 								            num = num[:-2]
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        try:
 								            num = float(num)
 								        except ValueError:
 								            msg = "%s is not a floating point number: %s" % (name, num)
 								            raise argparse.ArgumentTypeError(msg)
 								    if unit == ImgUnit.cm:
 								        num = (ImgSize.abs, cm_to_pt(num))
 								    elif unit == ImgUnit.mm:
 								        num = (ImgSize.abs, mm_to_pt(num))
 								    elif unit == ImgUnit.inch:
 								        num = (ImgSize.abs, in_to_pt(num))
 								    elif unit == ImgUnit.pt:
 								        num = (ImgSize.abs, num)
 								    elif unit == ImgUnit.dpi:
 								        num = (ImgSize.dpi, num)
 								    elif unit == ImgUnit.perc:
 								        num = (ImgSize.perc, num)
 								    return num
 								def parse_pagesize_rectarg(string):
 								    transposed = string.endswith("^T")
 								    if transposed:
 								        string = string[:-2]
 								    if papersizes.get(string.lower()):
 								        string = papersizes[string.lower()]
 								    if 'x' not in string:
-												make the separating x optional when specifying the width without the height

											
										
										
											2016-02-17 17:16:17 +00:00
+								        # if there is no separating "x" in the string, then the string is
 								        # interpreted as the width
 								        w = parse_num(string, "width")
 								        h = None
 								    else:
 								        w, h = string.split('x', 1)
 								        w = parse_num(w, "width")
 								        h = parse_num(h, "height")
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    if transposed:
 								        w, h = h, w
 								    if w is None and h is None:
 								        raise argparse.ArgumentTypeError("at least one dimension must be "
 								                                         "specified")
 								    return w, h
 								def parse_imgsize_rectarg(string):
 								    transposed = string.endswith("^T")
 								    if transposed:
 								        string = string[:-2]
 								    if papersizes.get(string.lower()):
 								        string = papersizes[string.lower()]
 								    if 'x' not in string:
-												make the separating x optional when specifying the width without the height

											
										
										
											2016-02-17 17:16:17 +00:00
+								        # if there is no separating "x" in the string, then the string is
 								        # interpreted as the width
 								        w = parse_imgsize_num(string, "width")
 								        h = None
 								    else:
 								        w, h = string.split('x', 1)
 								        w = parse_imgsize_num(w, "width")
 								        h = parse_imgsize_num(h, "height")
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    if transposed:
 								        w, h = h, w
 								    if w is None and h is None:
 								        raise argparse.ArgumentTypeError("at least one dimension must be "
 								                                         "specified")
 								    return w, h
 								def parse_colorspacearg(string):
 								    for c in Colorspace:
 								        if c.name == string:
 								            return c
 								    allowed = ", ".join([c.name for c in Colorspace])
 								    raise argparse.ArgumentTypeError("Unsupported colorspace: %s. Must be one "
 								                                     "of: %s." % (string, allowed))
 								def parse_borderarg(string):
 								    if ':' in string:
 								        h, v = string.split(':', 1)
 								        if h == '':
 								            raise argparse.ArgumentTypeError("missing value before colon")
 								        if v == '':
 								            raise argparse.ArgumentTypeError("missing value after colon")
 								    else:
 								        if string == '':
 								            raise argparse.ArgumentTypeError("border option cannot be empty")
 								        h, v = string, string
 								    h, v = parse_num(h, "left/right border"), parse_num(v, "top/bottom border")
 								    if h is None and v is None:
 								        raise argparse.ArgumentTypeError("missing value")
 								    return h, v
 								def input_images(path):
 								    if path == '-':
-												input images can now be file like objects, binary strings with the image content or filenames

											
										
										
											2016-02-14 17:54:59 +00:00
+								        # we slurp in all data from stdin because we need to seek in it later
 								        result = sys.stdin.buffer.read()
 								        if len(result) == 0:
 								            raise argparse.ArgumentTypeError("\"%s\" is empty" % path)
-												Changes to pdf page size handling

Changes to `valid_size()`
* accept common page sizes, such as letter and a4.
* parse dimensions of format: AuxBv#, where A is width, u is units, B is height, v is units, # are options.
* accept units: in, cm, mm, pt

Changes to `convert()`:
* resize pages based on dpi calculations
* default resize images into page size (like default resize in imagemagick)
* implement exact resizing (ignore dpi; equiv to ! in imagemagick)

Created `get_ndpi()`:
* provides dpi for page resizing
* implement fill resizing (equiv to ^ in imagemagick)

Other changes:
* default dpi in global variable

											
										
										
											2015-03-20 10:37:30 +00:00
+								    else:
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        try:
-												input images can now be file like objects, binary strings with the image content or filenames

											
										
										
											2016-02-14 17:54:59 +00:00
+								            if os.path.getsize(path) == 0:
 								                raise argparse.ArgumentTypeError("\"%s\" is empty" % path)
 								            # test-read a byte from it so that we can abort early in case
 								            # we cannot read data from the file
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								            with open(path, "rb") as im:
-												input images can now be file like objects, binary strings with the image content or filenames

											
										
										
											2016-02-14 17:54:59 +00:00
+								                im.read(1)
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								        except IsADirectoryError:
 								            raise argparse.ArgumentTypeError(
 								                "\"%s\" is a directory" % path)
 								        except PermissionError:
 								            raise argparse.ArgumentTypeError(
 								                "\"%s\" permission denied" % path)
 								        except FileNotFoundError:
 								            raise argparse.ArgumentTypeError(
 								                "\"%s\" does not exist" % path)
-												input images can now be file like objects, binary strings with the image content or filenames

											
										
										
											2016-02-14 17:54:59 +00:00
+								        result = path
 								    return result
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								def parse_fitarg(string):
 								    for m in FitMode:
 								        if m.name == string.lower():
 								            return m
 								    raise argparse.ArgumentTypeError("unknown fit mode: %s" % string)
 								def parse_panes(string):
 								    for m in PageMode:
 								        if m.name == string.lower():
 								            return m
 								    allowed = ", ".join([m.name for m in PageMode])
 								    raise argparse.ArgumentTypeError("Unsupported page mode: %s. Must be one "
 								                                     "of: %s." % (string, allowed))
 								def parse_magnification(string):
 								    for m in Magnification:
 								        if m.name == string.lower():
 								            return m
 								    try:
 								        return float(string)
 								    except ValueError:
 								        pass
 								    allowed = ", ".join([m.name for m in Magnification])
 								    raise argparse.ArgumentTypeError("Unsupported magnification: %s. Must be "
 								                                     "a floating point number or one of: %s." %
 								                                     (string, allowed))
-												Changes to pdf page size handling

Changes to `valid_size()`
* accept common page sizes, such as letter and a4.
* parse dimensions of format: AuxBv#, where A is width, u is units, B is height, v is units, # are options.
* accept units: in, cm, mm, pt

Changes to `convert()`:
* resize pages based on dpi calculations
* default resize images into page size (like default resize in imagemagick)
* implement exact resizing (ignore dpi; equiv to ! in imagemagick)

Created `get_ndpi()`:
* provides dpi for page resizing
* implement fill resizing (equiv to ^ in imagemagick)

Other changes:
* default dpi in global variable

											
										
										
											2015-03-20 10:37:30 +00:00
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								def parse_layout(string):
 								    for l in PageLayout:
 								        if l.name == string.lower():
 								            return l
 								    allowed = ", ".join([l.name for l in PageLayout])
 								    raise argparse.ArgumentTypeError("Unsupported page layout: %s. Must be "
 								                                     "one of: %s." % (string, allowed))
-												Start of converting the module to a proper package.

											
										
										
											2014-03-01 03:57:40 +00:00
 								def valid_date(string):
-												store times in UTC and understand YYYY-MM-DD, YYYY-MM-DDTHH:MM, YYYY-MM-DDTHH:MM:SS and everything understood by dateutil module and date --date

											
										
										
											2015-03-13 13:29:53 +00:00
+								    # first try parsing in ISO8601 format
 								    try:
 								        return datetime.strptime(string, "%Y-%m-%d")
 								    except ValueError:
 								        pass
 								    try:
 								        return datetime.strptime(string, "%Y-%m-%dT%H:%M")
 								    except ValueError:
 								        pass
 								    try:
 								        return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")
 								    except ValueError:
 								        pass
 								    # then try dateutil
 								    try:
 								        from dateutil import parser
 								    except ImportError:
 								        pass
 								    else:
 								        try:
 								            return parser.parse(string)
 								        except TypeError:
 								            pass
 								    # as a last resort, try the local date utility
 								    try:
 								        import subprocess
 								    except ImportError:
 								        pass
 								    else:
 								        try:
 								            utime = subprocess.check_output(["date", "--date", string, "+%s"])
 								        except subprocess.CalledProcessError:
 								            pass
 								        else:
 								            return datetime.utcfromtimestamp(int(utime))
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    raise argparse.ArgumentTypeError("cannot parse date: %s" % string)
 								def main():
 								    rendered_papersizes = ""
 								    for k, v in sorted(papersizes.items()):
 								        rendered_papersizes += "    %-8s %s\n" % (papernames[k], v)
 								    parser = argparse.ArgumentParser(
 								            formatter_class=argparse.RawDescriptionHelpFormatter,
 								            description='''\
-												Reflect the fact that we now use PNG compression in the --help output

											
										
										
											2018-03-24 18:53:41 +00:00
+								Losslessly convert raster images to PDF without re-encoding PNG, JPEG, and
 								JPEG2000 images. This leads to a lossless conversion of PNG, JPEG and JPEG2000
 								images with the only added file size coming from the PDF container itself.
 								Other raster graphics formats are losslessly stored using the same encoding
 								that PNG uses. Since PDF does not support images with transparency and since
 								img2pdf aims to never be lossy, input images with an alpha channel are not
 								supported.
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								The output is sent to standard output so that it can be redirected into a file
 								or to another program as part of a shell pipe. To directly write the output
 								into a file, use the -o or --output option.
-												Make --help output more friendly to help2man

											
										
										
											2017-01-20 04:17:23 +00:00
 								Options:
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								''',
 								            epilog='''\
-												Make --help output more friendly to help2man

											
										
										
											2017-01-20 04:17:23 +00:00
+								Colorspace:
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								  Currently, the colorspace must be forced for JPEG 2000 images that are not in
 								  the RGB colorspace.  Available colorspace options are based on Python Imaging
 								  Library (PIL) short handles.
 								    RGB      RGB color
 								    L        Grayscale
 Black and white (internally converted to grayscale)
 								    CMYK     CMYK color
 								    CMYK;I   CMYK color with inversion (for CMYK JPEG files from Adobe)
-												Make --help output more friendly to help2man

											
										
										
											2017-01-20 04:17:23 +00:00
+								Paper sizes:
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								  You can specify the short hand paper size names shown in the first column in
 								  the table below as arguments to the --pagesize and --imgsize options.  The
 								  width and height they are mapping to is shown in the second column.  Giving
 								  the value in the second column has the same effect as giving the short hand
 								  in the first column. Appending ^T (a caret/circumflex followed by the letter
 								  T) turns the paper size from portrait into landscape. The postfix thus
 								  symbolizes the transpose. The values are case insensitive.
 								%s
-												Make --help output more friendly to help2man

											
										
										
											2017-01-20 04:17:23 +00:00
+								Fit options:
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								  The img2pdf options for the --fit argument are shown in the first column in
 								  the table below. The function of these options can be mapped to the geometry
 								  operators of imagemagick. For users who are familiar with imagemagick, the
 								  corresponding operator is shown in the second column.  The third column shows
 								  whether or not the aspect ratio is preserved for that option (same as in
 								  imagemagick). Just like imagemagick, img2pdf tries hard to preserve the
 								  aspect ratio, so if the --fit argument is not given, then the default is
 								  "into" which corresponds to the absence of any operator in imagemagick.
 								  The value of the --fit option is case insensitive.
 								    into    |   | Y | The default. Width and height values specify maximum
 								            |   |   | values.
 								   ---------+---+---+----------------------------------------------------------
 								    fill    | ^ | Y | Width and height values specify the minimum values.
 								   ---------+---+---+----------------------------------------------------------
 								    exact   | ! | N | Width and height emphatically given.
 								   ---------+---+---+----------------------------------------------------------
 								    shrink  | > | Y | Shrinks an image with dimensions larger than the given
 								            |   |   | ones (and otherwise behaves like "into").
 								   ---------+---+---+----------------------------------------------------------
 								    enlarge | < | Y | Enlarges an image with dimensions smaller than the given
 								            |   |   | ones (and otherwise behaves like "into").
-												Make --help output more friendly to help2man

											
										
										
											2017-01-20 04:17:23 +00:00
+								Argument parsing:
 								  Argument long options can be abbreviated to a prefix if the abbreviation is
 								  anambiguous. That is, the prefix must match a unique option.
 								  Beware of your shell interpreting argument values as special characters (like
 								  the semicolon in the CMYK;I colorspace option). If in doubt, put the argument
 								  values in single quotes.
 								  If you want an argument value to start with one or more minus characters, you
 								  must use the long option name and join them with an equal sign like so:
 								    $ img2pdf --author=--test--
 								  If your input file name starts with one or more minus characters, either
 								  separate the input files from the other arguments by two minus signs:
 								    $ img2pdf -- --my-file-starts-with-two-minuses.jpg
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
-												Make --help output more friendly to help2man

											
										
										
											2017-01-20 04:17:23 +00:00
+								  Or be more explicit about its relative path by prepending a ./:
 								    $ img2pdf ./--my-file-starts-with-two-minuses.jpg
 								  The order of non-positional arguments (all arguments other than the input
 								  images) does not matter.
 								Examples:
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								  Lines starting with a dollar sign denote commands you can enter into your
 								  terminal. The dollar sign signifies your command prompt. It is not part of
 								  the command you type.
 								  Convert two scans in JPEG format to a PDF document.
 								    $ img2pdf --output out.pdf page1.jpg page2.jpg
 								  Convert a directory of JPEG images into a PDF with printable A4 pages in
 								  landscape mode. On each page, the photo takes the maximum amount of space
 								  while preserving its aspect ratio and a print border of 2 cm on the top and
 								  bottom and 2.5 cm on the left and right hand side.
 								    $ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg
 								  On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
 								  original image size if the image is smaller than that.
 								    $ img2pdf --output out.pdf -S A4 --imgsize 10cmx15cm --fit shrink *.jpg
 								  Prepare a directory of photos to be printed borderless on photo paper with a
 :2 aspect ratio and rotate each page so that its orientation is the same as
 								  the input image.
 								    $ img2pdf --output out.pdf --pagesize 15cmx10cm --auto-orient *.jpg
 								  Encode a grayscale JPEG2000 image. The colorspace has to be forced as img2pdf
 								  cannot read it from the JPEG2000 file automatically.
 								    $ img2pdf --output out.pdf --colorspace L input.jp2
-												Make --help output more friendly to help2man

											
										
										
											2017-01-20 04:17:23 +00:00
+								Written by Johannes 'josch' Schauer <josch@mister-muffin.de>
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
-												Make --help output more friendly to help2man

											
										
										
											2017-01-20 04:17:23 +00:00
+								Report bugs at https://gitlab.mister-muffin.de/josch/img2pdf/issues
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								''' % rendered_papersizes)
 								    parser.add_argument(
 								        'images', metavar='infile', type=input_images, nargs='*',
 								        help='Specifies the input file(s) in any format that can be read by '
 								        'the Python Imaging Library (PIL). If no input images are given, then '
 								        'a single image is read from standard input. The special filename "-" '
 								        'can be used once to read an image from standard input. To read a '
 								        'file in the current directory with the filename "-", pass it to '
 								        'img2pdf by explicitly stating its relative path like "./-".')
 								    parser.add_argument(
 								        '-v', '--verbose', action="store_true",
 								        help='Makes the program operate in verbose mode, printing messages on '
 								             'standard error.')
 								    parser.add_argument(
 								        '-V', '--version', action='version', version='%(prog)s '+__version__,
 								        help="Prints version information and exits.")
 								    outargs = parser.add_argument_group(
 								            title='General output arguments',
-												Make --help output more friendly to help2man

											
										
										
											2017-01-20 04:17:23 +00:00
+								            description='Arguments controlling the output format.')
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								    outargs.add_argument(
 								        '-o', '--output', metavar='out', type=argparse.FileType('wb'),
 								        default=sys.stdout.buffer,
 								        help='Makes the program output to a file instead of standard output.')
 								    outargs.add_argument(
 								        '-C', '--colorspace', metavar='colorspace', type=parse_colorspacearg,
 								        help='''
 								Forces the PIL colorspace. See the epilogue for a list of possible values.
 								Usually the PDF colorspace would be derived from the color space of the input
 								image. This option overwrites the automatically detected colorspace from the
 								input image and thus forces a certain colorspace in the output PDF /ColorSpace
 								property. This is useful for JPEG 2000 images with a different colorspace than
 								RGB.''')
 								    outargs.add_argument(
 								        '-D', '--nodate', action="store_true",
 								        help='Suppresses timestamps in the output and thus makes the output '
 								              'deterministic between individual runs. You can also manually '
 								              'set a date using the --moddate and --creationdate options.')
 								    outargs.add_argument(
 								        "--without-pdfrw", action="store_true",
 								        help="By default, img2pdf uses the pdfrw library to create the output "
 								             "PDF if pdfrw is available. If you want to use the internal PDF "
 								             "generator of img2pdf even if pdfrw is present, then pass this "
 								             "option. This can be useful if you want to have unicode metadata "
 								             "values which pdfrw does not yet support (See "
 								             "https://github.com/pmaupin/pdfrw/issues/39) or if you want the "
 								             "PDF code to be more human readable.")
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
+								    outargs.add_argument(
 								        "--first-frame-only", action="store_true",
 								        help="By default, img2pdf will convert multi-frame images like "
 								             "multi-page TIFF or animated GIF images to one page per frame. "
 								             "This option will only let the first frame of every multi-frame "
 								             "input image be converted into a page in the resulting PDF."
 								            )
-												Add option --pillow-limit-break to force Pillow to accept large input images

											
										
										
											2018-03-24 18:47:03 +00:00
+								    outargs.add_argument(
 								        "--pillow-limit-break", action="store_true",
 								        help="img2pdf uses the Python Imaging Library Pillow to read input "
 								             "images. Pillow limits the maximum input image size to %d pixels "
 								             "to prevent decompression bomb denial of service attacks. If "
 								             "your input image contains more pixels than that, use this "
 								             "option to disable this safety measure during this run of img2pdf"
 								             %Image.MAX_IMAGE_PIXELS)
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    sizeargs = parser.add_argument_group(
 								        title='Image and page size and layout arguments',
 								        description='''\
-												Make --help output more friendly to help2man

											
										
										
											2017-01-20 04:17:23 +00:00
+								Every input image will be placed on its own page. The image size is controlled
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								by the dpi value of the input image or, if unset or missing, the default dpi of
 								%.2f. By default, each page will have the same size as the image it shows.
 								Thus, there will be no visible border between the image and the page border by
 								default. If image size and page size are made different from each other by the
 								options in this section, the image will always be centered in both dimensions.
 								The image size and page size can be explicitly set using the --imgsize and
 								--pagesize options, respectively.  If either dimension of the image size is
 								specified but the same dimension of the page size is not, then the latter will
 								be derived from the former using an optional minimal distance between the image
 								and the page border (given by the --border option) and/or a certain fitting
 								strategy (given by the --fit option). The converse happens if a dimension of
 								the page size is set but the same dimension of the image size is not.
 								Any length value in below options is represented by the meta variable L which
 								is a floating point value with an optional unit appended (without a space
 								between them). The default unit is pt (1/72 inch, the PDF unit) and other
 								allowed units are cm (centimeter), mm (millimeter), and in (inch).
 								Any size argument of the format LxL in the options below specifies the width
 								and height of a rectangle where the first L represents the width and the second
 								L represents the height with an optional unit following each value as described
-												make the separating x optional when specifying the width without the height

											
										
										
											2016-02-17 17:16:17 +00:00
+								above.  Either width or height may be omitted. If the height is omitted, the
 								separating x can be omitted as well. Omitting the width requires to prefix the
 								height with the separating x. The missing dimension will be chosen so to not
 								change the image aspect ratio. Instead of giving the width and height
 								explicitly, you may also specify some (case-insensitive) common page sizes such
 								as letter and A4.  See the epilogue at the bottom for a complete list of the
 								valid sizes.
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
 								The --fit option scales to fit the image into a rectangle that is either
-												fix typos

											
										
										
											2016-06-10 14:39:45 +00:00
+								derived from the --imgsize option or otherwise from the --pagesize option.
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								If the --border option is given in addition to the --imgsize option while the
 								--pagesize option is not given, then the page size will be calculated from the
 								image size, respecting the border setting. If the --border option is given in
 								addition to the --pagesize option while the --imgsize option is not given, then
 								the image size will be calculated from the page size, respecting the border
 								setting. If the --border option is given while both the --pagesize and
 								--imgsize options are passed, then the --border option will be ignored.
 								''' % default_dpi)
 								    sizeargs.add_argument(
 								            '-S', '--pagesize', metavar='LxL', type=parse_pagesize_rectarg,
 								            help='''
 								Sets the size of the PDF pages. The short-option is the upper case S because
 								it is an mnemonic for being bigger than the image size.''')
 								    sizeargs.add_argument(
 								            '-s', '--imgsize', metavar='LxL', type=parse_imgsize_rectarg,
 								            help='''
 								Sets the size of the images on the PDF pages.  In addition, the unit dpi is
 								allowed which will set the image size as a value of dots per inch.  Instead of
 								a unit, width and height values may also have a percentage sign appended,
 								indicating a resize of the image by that percentage. The short-option is the
 								lower case s because it is an mnemonic for being smaller than the page size.
 								''')
 								    sizeargs.add_argument(
 								            '-b', '--border', metavar='L[:L]', type=parse_borderarg,
 								            help='''
 								Specifies the minimal distance between the image border and the PDF page
 								border.  This value Is overwritten by explicit values set by --pagesize or
 								--imgsize.  The value will be used when calculating page dimensions from the
 								image dimensions or the other way round. One, or two length values can be given
 								as an argument, separated by a colon. One value specifies the minimal border on
 								all four sides. Two values specify the minimal border on the top/bottom and
 								left/right, respectively. It is not possible to specify asymmetric borders
 								because images will always be centered on the page.
 								''')
 								    sizeargs.add_argument(
 								            '-f', '--fit', metavar='FIT', type=parse_fitarg,
 								            default=FitMode.into, help='''
 								If --imgsize is given, fits the image using these dimensions. Otherwise, fit
 								the image into the dimensions given by --pagesize.  FIT is one of into, fill,
 								exact, shrink and enlarge. The default value is "into". See the epilogue at the
 								bottom for a description of the FIT options.
 								''')
 								    sizeargs.add_argument(
 								            '-a', '--auto-orient', action="store_true",
 								            help='''
 								If both dimensions of the page are given via --pagesize, conditionally swaps
 								these dimensions such that the page orientation is the same as the orientation
 								of the input image. If the orientation of a page gets flipped, then so do the
 								values set via the --border option.
 								''')
-												Make --help output more friendly to help2man

											
										
										
											2017-01-20 04:17:23 +00:00
+								    metaargs = parser.add_argument_group(
 								        title='Arguments setting metadata',
 								        description='Options handling embedded timestamps, title and author '
 								                    'information.')
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    metaargs.add_argument(
 								        '--title', metavar='title', type=str,
 								        help='Sets the title metadata value')
 								    metaargs.add_argument(
 								        '--author', metavar='author', type=str,
 								        help='Sets the author metadata value')
 								    metaargs.add_argument(
 								        '--creator', metavar='creator', type=str,
 								        help='Sets the creator metadata value')
 								    metaargs.add_argument(
-												Put version string in /Producer too

											
										
										
											2016-02-12 12:55:58 +00:00
+								        '--producer', metavar='producer', type=str,
 								        default="img2pdf " + __version__,
-												Fix default of --producer argument in --help output to include version

											
										
										
											2017-01-20 03:46:26 +00:00
+								        help='Sets the producer metadata value '
 								             '(default is: img2pdf ' + __version__ + ')')
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    metaargs.add_argument(
 								        '--creationdate', metavar='creationdate', type=valid_date,
 								        help='Sets the UTC creation date metadata value in YYYY-MM-DD or '
 								             'YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format '
 								             'understood by python dateutil module or any format understood '
 								             'by `date --date`')
 								    metaargs.add_argument(
 								        '--moddate', metavar='moddate', type=valid_date,
 								        help='Sets the UTC modification date metadata value in YYYY-MM-DD '
 								             'or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format '
 								             'understood by python dateutil module or any format understood '
 								             'by `date --date`')
 								    metaargs.add_argument(
 								        '--subject', metavar='subject', type=str,
 								        help='Sets the subject metadata value')
 								    metaargs.add_argument(
 								        '--keywords', metavar='kw', type=str, nargs='+',
 								        help='Sets the keywords metadata value (can be given multiple times)')
 								    viewerargs = parser.add_argument_group(
 								        title='PDF viewer arguments',
 								        description='PDF files can specify how they are meant to be '
 								                    'presented to the user by a PDF viewer')
 								    viewerargs.add_argument(
 								        '--viewer-panes', metavar="PANES", type=parse_panes,
 								        help='Instruct the PDF viewer which side panes to show. Valid values '
 								             'are "outlines" and "thumbs". It is not possible to specify both '
 								             'at the same time.')
 								    viewerargs.add_argument(
 								        '--viewer-initial-page', metavar="NUM", type=int,
 								        help='Instead of showing the first page, instruct the PDF viewer to '
 								             'show the given page instead. Page numbers start with 1.')
 								    viewerargs.add_argument(
 								        '--viewer-magnification', metavar="MAG", type=parse_magnification,
 								        help='Instruct the PDF viewer to open the PDF with a certain zoom '
 								             'level. Valid values are either a floating point number giving '
 								             'the exact zoom level, "fit" (zoom to fit whole page), "fith" '
 								             '(zoom to fit page width) and "fitbh" (zoom to fit visible page '
 								             'width).')
 								    viewerargs.add_argument(
 								        '--viewer-page-layout', metavar="LAYOUT", type=parse_layout,
 								        help='Instruct the PDF viewer how to arrange the pages on the screen. '
 								             'Valid values are "single" (display single pages), "onecolumn" '
 								             '(one continuous column), "twocolumnright" (two continuous '
 								             'columns with odd number pages on the right) and "twocolumnleft" '
 								             '(two continuous columns with odd numbered pages on the left)')
 								    viewerargs.add_argument(
 								        '--viewer-fit-window', action="store_true",
 								        help='Instruct the PDF viewer to resize the window to fit the page '
 								             'size')
 								    viewerargs.add_argument(
 								        '--viewer-center-window', action="store_true",
 								        help='Instruct the PDF viewer to center the PDF viewer window')
 								    viewerargs.add_argument(
 								        '--viewer-fullscreen', action="store_true",
 								        help='Instruct the PDF viewer to open the PDF in fullscreen mode')
 								    args = parser.parse_args()
 								    if args.verbose:
 								        logging.basicConfig(level=logging.DEBUG)
-												Add option --pillow-limit-break to force Pillow to accept large input images

											
										
										
											2018-03-24 18:47:03 +00:00
+								    if args.pillow_limit_break:
 								        Image.MAX_IMAGE_PIXELS = None
-												release version 0.2.0 - big rewrite and lots of new features

 - now Python3 only
 - pep8 compliant code
 - update my email to josch@mister-muffin.de
 - move from github to gitlab.mister-muffin.de/josch/img2pdf
 - use logging module
 - add extensive test suite
 - ability to read from standard input
 - pdf writer:
      - make more compatible with the interface of pdfrw module
      - print floats which equal to their integer conversion as integer
      - do not print trailing zeroes for floating point numbers
      - print more linebreaks
      - add binary string at beginning of PDF to indicate that the PDF
        contains binary data
      - handle datetime and unicode strings by using utf-16-be encoding
 - new options (see --help for more details):
      - --without-pdfrw
      - --imgsize
      - --border
      - --fit
      - --auto-orient
      - --viewer-panes
      - --viewer-initial-page
      - --viewer-magnification
      - --viewer-page-layout
      - --viewer-fit-window
      - --viewer-center-window
      - --viewer-fullscreen
 - remove short command line options for metadata arguments

											
										
										
											2015-05-10 12:35:09 +00:00
+								    layout_fun = get_layout_fun(args.pagesize, args.imgsize, args.border,
 								                                args.fit, args.auto_orient)
 								    # if no positional arguments were supplied, read a single image from
 								    # standard input
 								    if len(args.images) == 0:
 								        logging.info("reading image from standard input")
 								        try:
 								            args.images = [sys.stdin.buffer.read()]
 								        except KeyboardInterrupt:
 								            exit(0)
 								    # with the number of pages being equal to the number of images, the
 								    # value passed to --viewer-initial-page must be between 1 and that number
 								    if args.viewer_initial_page is not None:
 								        if args.viewer_initial_page < 1:
 								            parser.print_usage(file=sys.stderr)
 								            logging.error("%s: error: argument --viewer-initial-page: must be "
 								                          "greater than zero" % parser.prog)
 								            exit(2)
 								        if args.viewer_initial_page > len(args.images):
 								            parser.print_usage(file=sys.stderr)
 								            logging.error("%s: error: argument --viewer-initial-page: must be "
 								                          "less than or equal to the total number of pages" %
 								                          parser.prog)
 								            exit(2)
-												add options to specify pdf dimensions in points

add options specify output pdf dimensions in points:  -x width;  -y height.
											
										
										
											2014-08-04 15:25:07 +00:00
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								    try:
 								        convert(
 								            *args.images, title=args.title, author=args.author,
 								            creator=args.creator, producer=args.producer,
 								            creationdate=args.creationdate, moddate=args.moddate,
 								            subject=args.subject, keywords=args.keywords,
 								            colorspace=args.colorspace, nodate=args.nodate,
 								            layout_fun=layout_fun, viewer_panes=args.viewer_panes,
 								            viewer_initial_page=args.viewer_initial_page,
 								            viewer_magnification=args.viewer_magnification,
 								            viewer_page_layout=args.viewer_page_layout,
 								            viewer_fit_window=args.viewer_fit_window,
 								            viewer_center_window=args.viewer_center_window,
 								            viewer_fullscreen=args.viewer_fullscreen, with_pdfrw=not
-												support from multi-frame images like multipage TIFF and animated GIF

											
										
										
											2016-02-16 23:30:15 +00:00
+								            args.without_pdfrw, outputstream=args.output,
 								            first_frame_only=args.first_frame_only)
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								    except Exception as e:
 								        logging.error("error: " + str(e))
-												when verbose logging is enabled, print a traceback of possibly raised exceptions

											
										
										
											2016-02-14 17:55:27 +00:00
+								        if logging.getLogger().isEnabledFor(logging.DEBUG):
 								            import traceback
 								            traceback.print_exc(file=sys.stderr)
-												raise exceptions instead of doing an exit()

											
										
										
											2016-02-13 08:31:33 +00:00
+								        exit(1)
-												allow running src/img2pdf.py standalone

											
										
										
											2014-03-14 18:13:03 +00:00
-												Make pep8 compliant again

											
										
										
											2017-01-19 10:22:08 +00:00
-												allow running src/img2pdf.py standalone

											
										
										
											2014-03-14 18:13:03 +00:00
+								if __name__ == '__main__':
 								    main()