15 changed files with 1175 additions and 2826 deletions
--- a/.mailmap
+++ b/.mailmap
@ -1,3 +0,0 @@
-Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
-Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <j.schauer@email.de>
-Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <josch@pyneo.org>
--- a/CHANGES.rst
+++ b/CHANGES.rst
@ -2,51 +2,7 @@
 CHANGES
 =======

-0.6.0 (2025-02-15)
------------------
-
- - Add support for JBIG2 (generic coding)
- - Add convert_to_docobject() broken out from convert()
- - Add pil_get_dpi() broken out from get_imgmetadata()
-
-0.5.1 (2023-11-26)
------------------
-
- - no default ICC profile location for PDF/A-1b on Windows
- - workaround for PNG input without dpi units but non-square dpi aspect ratio
-
-0.5.0 (2023-10-28)
------------------
-
- - support MIFF for 16 bit CMYK input
- - accept pathlib.Path objects as input
- - don't store RGB ICC profiles from bilevel or grayscale TIFF, PNG and JPEG
- - thumbnails are no longer included by default and --include-thumbnails has to
-   be used if you want them
- - support for pikepdf (>= 6.2.0)
-
-0.4.4 (2022-04-07)
------------------
-
- - --viewer-page-layout support for twopageright and twopageleft
- - Add B and JB paper sizes
- - support for pikepdf (>= 5.0.0) and Pillow (>= 9.1.0)
-
-0.4.3 (2021-10-24)
------------------
-
- - fix --viewer-initial-page (broken in last release)
-
-0.4.2 (2021-10-11)
------------------
-
- - add --rotation
- - allow palette PNG images with ICC profile
- - sort globbing result on windows
- - convert 8-bit PNG alpha channels to /SMasks in PDF
- - remove pdfrw from tests
-
-0.4.1 (2021-05-09)
+0.4.1 (2020-05-09)
 ------------------

 - support wildcards in paths on windows
--- a/43
+++ b/43
@ -27,45 +27,6 @@ Making a new release

 - Build and upload to pypi:

-    $ rm -rf dist/*
+    $ rm dist/*
    $ python3 setup.py sdist
-    $ twine upload dist/*
-
- - Push everything to git forge
-
-    $ git push
-
-Using debbisect to find regressions
-----------------------------------
-
-    $ debbisect --cache=./cache  --depends="git,ca-certificates,python3,
-           ghostscript,imagemagick,mupdf-tools,poppler-utils,python3-pil,
-           python3-pytest,python3-numpy,python3-scipy,python3-pikepdf" \
-      --verbose  2023-09-16 2023-10-24 \
-      'chroot "$1" sh -c "
-           git clone https://gitlab.mister-muffin.de/josch/img2pdf.git
-           && cd img2pdf
-           && pytest 'src/img2pdf_test.py::test_jpg_2000_rgba8[internal]"'
-
-Using debbisect cache
---------------------
-
-    $ mmdebstrap --variant=apt --aptopt='Acquire::Check-Valid-Until "false"' \
-          --include=git,ca-certificates,python3,ghostscript,imagemagick \
-          --include=mupdf-tools,poppler-utils,python3-pil,python3-pytest \
-          --include=python3-numpy,python3-scipy,python3-pikepdf \
-          --hook-dir=/usr/share/mmdebstrap/hooks/file-mirror-automount \
-          --setup-hook='mkdir -p "$1/home/josch/git/devscripts/cache/pool/"' \
-          --setup-hook='mount -o ro,bind /home/josch/git/devscripts/cache/pool/ "$1/home/josch/git/devscripts/cache/pool/"' \
-          --chrooted-customize-hook=bash
-          unstable /dev/null
-          file:///home/josch/git/devscripts/cache/archive/debian/20231022T090139Z/
-
-Bisecting imagemagick
---------------------
-
-    $ git clean -fdx && git reset --hard
-    $ ./configure --prefix=$(pwd)/prefix
-    $ make -j$(nproc)
-    $ make install
-    $ LD_LIBRARY_PATH=$(pwd)/prefix/lib prefix/bin/compare ...
+    $ twine upload --sign dist/*
--- a/README.md
+++ b/README.md
@ -28,19 +28,17 @@ The following table shows how img2pdf handles different input depending on the
 input file format and image color space.

 | Format               | Colorspace                     | Result        |
-| ------------------------------------- | ------------------------------------ | ------------- |
+| -------------------- | ------------------------------ | ------------- |
 | JPEG                 | any                            | direct        |
 | JPEG2000             | any                            | direct        |
-| PNG (non-interlaced, no transparency) | any                                  | direct        |
-| TIFF (CCITT Group 4)                  | 1-bit monochrome                     | direct        |
-| JBIG2 (single-page generic coding)    | 1-bit monochrome                     | direct        |
-| any                                   | any except CMYK and 1-bit monochrome | PNG Paeth     |
-| any                                   | 1-bit monochrome                     | CCITT Group 4 |
+| PNG (non-interlaced) | any                            | direct        |
+| TIFF (CCITT Group 4) | monochrome                     | direct        |
+| any                  | any except CMYK and monochrome | PNG Paeth     |
+| any                  | monochrome                     | CCITT Group 4 |
 | any                  | CMYK                           | flate         |

-For JPEG, JPEG2000, non-interlaced PNG, TIFF images with CCITT Group 4
-encoded data, and JBIG2 with single-page generic coding (e.g. using `jbig2enc`),
-img2pdf directly embeds the image data into the PDF without
+For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4
+encoded data, img2pdf directly embeds the image data into the PDF without
 re-encoding it. It thus treats the PDF format merely as a container format for
 the image data. In these cases, img2pdf only increases the filesize by the size
 of the PDF container (typically around 500 to 700 bytes). Since data is only
@ -49,7 +47,7 @@ solutions for these input formats.

 For all other input types, img2pdf first has to transform the pixel data to
 make it compatible with PDF. In most cases, the PNG Paeth filter is applied to
-the pixel data. For 1-bit monochrome input, CCITT Group 4 is used instead. Only for
+the pixel data. For monochrome input, CCITT Group 4 is used instead. Only for
 CMYK input no filter is applied before finally applying flate compression.

 Usage
@ -67,12 +65,6 @@ The detailed documentation can be accessed by running:

 	$ img2pdf --help

-With no command line arguments supplied, img2pdf will read a single image from
-standard input and write the resulting PDF to standard output. Here is an
-example for how to scan directly to PDF using scanimage(1) from SANE:
-
-	$ scanimage --mode=Color --resolution=300 | pnmtojpeg -quality 90 | img2pdf > scan.pdf
-
 Bugs
 ----

@ -80,15 +72,25 @@ Bugs
   when embedded into the PDF cannot be read by the Adobe Acrobat Reader,
   please contact me.

+ - I have not yet figured out how to determine the colorspace of JPEG2000
+   files.  Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000
+   files with other colorspaces, you must explicitly specify it using the
+   `--colorspace` option.
+
+ - Input images with alpha channels are not allowed. PDF only supports
+   transparency using binary masks but is unable to store 8-bit transparency
+   information as part of the image itself. But img2pdf will always be lossless
+   and thus, input images must not carry transparency information. You can
+   remove the alpha channel for example with imagemagick:
+
+    convert input.png -background white -alpha remove -alpha off output.png
+
 - An error is produced if the input image is broken. This commonly happens if
   the input image has an invalid EXIF Orientation value of zero. Even though
   only nine different values from 1 to 9 are permitted, Anroid phones and
   Canon DSLR cameras produce JPEG images with the invalid value of zero.
   Either fix your input images with `exiftool` or similar software before
-   passing the JPEG to `img2pdf` or run `img2pdf` with `--rotation=ifvalid`
-   (if you run img2pdf from the commandline) or by passing
-   `rotation=img2pdf.Rotation.ifvalid` as an argument to `convert()` when using
-   img2pdf as a library.
+   passing the JPEG to `img2pdf` or run `img2pdf` with `--rotation=ifvalid`.

 - img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the
   input if necessary. To prevent decompression bomb denial of service attacks,
@ -125,9 +127,10 @@ You can then test the converter using:

 	$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg

-If you don't want to setup Python on Windows, then head to the
-[releases](/josch/img2pdf/releases) section and download the latest
-`img2pdf.exe`.
+For Microsoft Windows users, PyInstaller based .exe files are produced by
+appveyor. If you don't want to install Python before using img2pdf you can head
+to appveyor and click on "Artifacts" to download the latest version:
+https://ci.appveyor.com/project/josch/img2pdf

 GUI
 ---
@ -154,10 +157,6 @@ The package can also be used as a library:
 	with open("name.pdf","wb") as f1, open("test.jpg") as f2:
 		f1.write(img2pdf.convert(f2))

-	# opening using pathlib
-	with open("name.pdf","wb") as f:
-		f.write(img2pdf.convert(pathlib.Path('test.jpg')))
-
 	# using in-memory image data
 	with open("name.pdf","wb") as f:
 		f.write(img2pdf.convert("\x89PNG...")
@ -200,15 +199,6 @@ The package can also be used as a library:
 	with open("name.pdf","wb") as f:
 		f.write(img2pdf.convert(glob.glob("/path/to/*.jpg")))

-	# convert all files matching a glob using pathlib.Path
-	from pathlib import Path
-	with open("name.pdf","wb") as f:
-		f.write(img2pdf.convert(*Path("/path").glob("**/*.jpg")))
-
-	# ignore invalid rotation values in the input images
-	with open("name.pdf","wb") as f:
-		f.write(img2pdf.convert('test.jpg'), rotation=img2pdf.Rotation.ifvalid)
-
 	# writing to file descriptor
 	with open("name.pdf","wb") as f1, open("test.jpg") as f2:
 		img2pdf.convert(f2, outputstream=f1)
@ -316,14 +306,3 @@ Tesseract might not do a lossless conversion. For example it converts CMYK
 input to RGB and removes the alpha channel from images with transparency. For
 multipage TIFF or animated GIF, it will only convert the first frame.

-Comparison to econvert from ExactImage
--------------------------------------
-
-Like pdflatex and podofoimg2pf, econvert is able to embed JPEG images into PDF
-directly without re-encoding but when given other file formats, it stores them
-just using flate compressen, which unnecessarily increases the filesize.
-Furthermore, it throws an error with CMYK TIF input. It also doesn't store CMYK
-jpeg files as CMYK but converts them to RGB, so it's not lossless. When trying
-to feed it 16bit files, it errors out with Unhandled bps/spp combination. It
-also seems to choose JPEG encoding when using it on some file types (like
-palette images) making it again not lossless for that input as well.
--- a/appveyor.yml
+++ b/appveyor.yml
@ -16,18 +16,16 @@ environment:
    - PYTHON: "C:\\Python37-x64"

 install:
-  - "%PYTHON%\\python.exe -m pip install tox wheel pyinstaller Pillow"
+  - "%PYTHON%\\python.exe -m pip install tox wheel pyinstaller"

 build: off

-# don't run tests on windows because we don't have imagemagick
-#test_script:
-#  - "%PYTHON%\\python.exe -m tox"
+test_script:
+  - "%PYTHON%\\python.exe -m tox"

 after_test:
  - "%PYTHON%\\python.exe setup.py bdist_wheel"
-  - "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --console --nowindowed --name img2pdf src/img2pdf.py"
-  #- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole --windowed --name img2pdf_windowed src/img2pdf.py"
+  - "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole src/img2pdf.py"

 artifacts:
  - path: dist\*
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,2 @@
+[metadata]
+description-file = README.md
--- a/setup.py
+++ b/setup.py
@ -1,7 +1,7 @@
 import sys
 from setuptools import setup

-VERSION = "0.6.0"
+VERSION = "0.4.1"

 INSTALL_REQUIRES = (
    "Pillow",
@ -11,7 +11,7 @@ INSTALL_REQUIRES = (
 setup(
    name="img2pdf",
    version=VERSION,
-    author="Johannes Schauer Marin Rodrigues",
+    author="Johannes 'josch' Schauer",
    author_email="josch@mister-muffin.de",
    description="Convert images to PDF via direct JPEG inclusion.",
    long_description=open("README.md").read(),
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
--- a/src/img2pdf_test.py
+++ b/src/img2pdf_test.py
--- a/src/jp2.py
+++ b/src/jp2.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 #
-# Copyright (C) 2013 Johannes Schauer Marin Rodrigues <j.schauer at email.de>
+# Copyright (C) 2013 Johannes 'josch' Schauer <j.schauer at email.de>
 #
 # this module is heavily based upon jpylyzer which is
 # KB / National Library of the Netherlands, Open Planets Foundation
@ -37,8 +37,9 @@ def getBox(data, byteStart, noBytes):


 def parse_ihdr(data):
-    height, width, channels, bpp = struct.unpack(">IIHB", data[:11])
-    return width, height, channels, bpp + 1
+    height = struct.unpack(">I", data[0:4])[0]
+    width = struct.unpack(">I", data[4:8])[0]
+    return width, height


 def parse_colr(data):
@ -84,13 +85,13 @@ def parse_jp2h(data):
    while byteStart < noBytes and boxLengthValue != 0:
        boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
        if boxType == b"ihdr":
-            width, height, channels, bpp = parse_ihdr(boxContents)
+            width, height = parse_ihdr(boxContents)
        elif boxType == b"colr":
            colorspace = parse_colr(boxContents)
        elif boxType == b"res ":
            hdpi, vdpi = parse_res(boxContents)
        byteStart = byteEnd
-    return (width, height, colorspace, hdpi, vdpi, channels, bpp)
+    return (width, height, colorspace, hdpi, vdpi)


 def parsejp2(data):
@ -101,9 +102,7 @@ def parsejp2(data):
    while byteStart < noBytes and boxLengthValue != 0:
        boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
        if boxType == b"jp2h":
-            width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h(
-                boxContents
-            )
+            width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents)
            break
        byteStart = byteEnd
    if not width:
@ -113,41 +112,13 @@ def parsejp2(data):
    if not colorspace:
        raise Exception("no colorspace in jp2 header")
    # retrieving the dpi is optional so we do not error out if not present
-    return (width, height, colorspace, hdpi, vdpi, channels, bpp)
-
-
-def parsej2k(data):
-    lsiz, rsiz, xsiz, ysiz, xosiz, yosiz, _, _, _, _, csiz = struct.unpack(
-        ">HHIIIIIIIIH", data[4:42]
-    )
-    ssiz = [None] * csiz
-    xrsiz = [None] * csiz
-    yrsiz = [None] * csiz
-    for i in range(csiz):
-        ssiz[i], xrsiz[i], yrsiz[i] = struct.unpack(
-            "BBB", data[42 + 3 * i : 42 + 3 * (i + 1)]
-        )
-    assert ssiz == [7, 7, 7]
-    return xsiz - xosiz, ysiz - yosiz, None, None, None, csiz, 8
-
-
-def parse(data):
-    if data[:4] == b"\xff\x4f\xff\x51":
-        return parsej2k(data)
-    else:
-        return parsejp2(data)
+    return (width, height, colorspace, hdpi, vdpi)


 if __name__ == "__main__":
    import sys

-    width, height, colorspace, hdpi, vdpi, channels, bpp = parse(
-        open(sys.argv[1], "rb").read()
-    )
-    print("width = %d" % width)
-    print("height = %d" % height)
-    print("colorspace = %s" % colorspace)
-    print("hdpi = %s" % hdpi)
-    print("vdpi = %s" % vdpi)
-    print("channels = %s" % channels)
-    print("bpp = %s" % bpp)
+    width, height, colorspace = parsejp2(open(sys.argv[1]).read())
+    sys.stdout.write("width = %d" % width)
+    sys.stdout.write("height = %d" % height)
+    sys.stdout.write("colorspace = %s" % colorspace)
--- a/src/tests/input/animation.gif
+++ b/src/tests/input/animation.gif
--- a/src/tests/input/mono.jb2
+++ b/src/tests/input/mono.jb2
--- a/src/tests/output/animation.gif.pdf
+++ b/src/tests/output/animation.gif.pdf
--- a/src/tests/output/mono.jb2.pdf
+++ b/src/tests/output/mono.jb2.pdf
--- a/tox.ini
+++ b/tox.ini
@ -4,7 +4,7 @@
 # and then run "tox" from this directory.

 [tox]
-envlist = py37, py38, py39, py310
+envlist = py35, py36, py37, py38, py39
 skip_missing_interpreters = true

 [testenv]