Compare commits
No commits in common. "main" and "im7" have entirely different histories.
13 changed files with 1100 additions and 2541 deletions
3
.mailmap
3
.mailmap
|
@ -1,3 +0,0 @@
|
||||||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
|
|
||||||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <j.schauer@email.de>
|
|
||||||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <josch@pyneo.org>
|
|
39
CHANGES.rst
39
CHANGES.rst
|
@ -2,44 +2,7 @@
|
||||||
CHANGES
|
CHANGES
|
||||||
=======
|
=======
|
||||||
|
|
||||||
0.5.1 (2023-11-26)
|
0.4.1 (2020-05-09)
|
||||||
------------------
|
|
||||||
|
|
||||||
- no default ICC profile location for PDF/A-1b on Windows
|
|
||||||
- workaround for PNG input without dpi units but non-square dpi aspect ratio
|
|
||||||
|
|
||||||
0.5.0 (2023-10-28)
|
|
||||||
------------------
|
|
||||||
|
|
||||||
- support MIFF for 16 bit CMYK input
|
|
||||||
- accept pathlib.Path objects as input
|
|
||||||
- don't store RGB ICC profiles from bilevel or grayscale TIFF, PNG and JPEG
|
|
||||||
- thumbnails are no longer included by default and --include-thumbnails has to
|
|
||||||
be used if you want them
|
|
||||||
- support for pikepdf (>= 6.2.0)
|
|
||||||
|
|
||||||
0.4.4 (2022-04-07)
|
|
||||||
------------------
|
|
||||||
|
|
||||||
- --viewer-page-layout support for twopageright and twopageleft
|
|
||||||
- Add B and JB paper sizes
|
|
||||||
- support for pikepdf (>= 5.0.0) and Pillow (>= 9.1.0)
|
|
||||||
|
|
||||||
0.4.3 (2021-10-24)
|
|
||||||
------------------
|
|
||||||
|
|
||||||
- fix --viewer-initial-page (broken in last release)
|
|
||||||
|
|
||||||
0.4.2 (2021-10-11)
|
|
||||||
------------------
|
|
||||||
|
|
||||||
- add --rotation
|
|
||||||
- allow palette PNG images with ICC profile
|
|
||||||
- sort globbing result on windows
|
|
||||||
- convert 8-bit PNG alpha channels to /SMasks in PDF
|
|
||||||
- remove pdfrw from tests
|
|
||||||
|
|
||||||
0.4.1 (2021-05-09)
|
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
- support wildcards in paths on windows
|
- support wildcards in paths on windows
|
||||||
|
|
39
HACKING
39
HACKING
|
@ -27,41 +27,6 @@ Making a new release
|
||||||
|
|
||||||
- Build and upload to pypi:
|
- Build and upload to pypi:
|
||||||
|
|
||||||
$ rm -rf dist/*
|
$ rm dist/*
|
||||||
$ python3 setup.py sdist
|
$ python3 setup.py sdist
|
||||||
$ twine upload dist/*
|
$ twine upload --sign dist/*
|
||||||
|
|
||||||
Using debbisect to find regressions
|
|
||||||
-----------------------------------
|
|
||||||
|
|
||||||
$ debbisect --cache=./cache --depends="git,ca-certificates,python3,
|
|
||||||
ghostscript,imagemagick,mupdf-tools,poppler-utils,python3-pil,
|
|
||||||
python3-pytest,python3-numpy,python3-scipy,python3-pikepdf" \
|
|
||||||
--verbose 2023-09-16 2023-10-24 \
|
|
||||||
'chroot "$1" sh -c "
|
|
||||||
git clone https://gitlab.mister-muffin.de/josch/img2pdf.git
|
|
||||||
&& cd img2pdf
|
|
||||||
&& pytest 'src/img2pdf_test.py::test_jpg_2000_rgba8[internal]"'
|
|
||||||
|
|
||||||
Using debbisect cache
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
$ mmdebstrap --variant=apt --aptopt='Acquire::Check-Valid-Until "false"' \
|
|
||||||
--include=git,ca-certificates,python3,ghostscript,imagemagick \
|
|
||||||
--include=mupdf-tools,poppler-utils,python3-pil,python3-pytest \
|
|
||||||
--include=python3-numpy,python3-scipy,python3-pikepdf \
|
|
||||||
--hook-dir=/usr/share/mmdebstrap/hooks/file-mirror-automount \
|
|
||||||
--setup-hook='mkdir -p "$1/home/josch/git/devscripts/cache/pool/"' \
|
|
||||||
--setup-hook='mount -o ro,bind /home/josch/git/devscripts/cache/pool/ "$1/home/josch/git/devscripts/cache/pool/"' \
|
|
||||||
--chrooted-customize-hook=bash
|
|
||||||
unstable /dev/null
|
|
||||||
file:///home/josch/git/devscripts/cache/archive/debian/20231022T090139Z/
|
|
||||||
|
|
||||||
Bisecting imagemagick
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
$ git clean -fdx && git reset --hard
|
|
||||||
$ ./configure --prefix=$(pwd)/prefix
|
|
||||||
$ make -j$(nproc)
|
|
||||||
$ make install
|
|
||||||
$ LD_LIBRARY_PATH=$(pwd)/prefix/lib prefix/bin/compare ...
|
|
||||||
|
|
67
README.md
67
README.md
|
@ -27,15 +27,15 @@ software, because the raw pixel data never has to be loaded into memory.
|
||||||
The following table shows how img2pdf handles different input depending on the
|
The following table shows how img2pdf handles different input depending on the
|
||||||
input file format and image color space.
|
input file format and image color space.
|
||||||
|
|
||||||
| Format | Colorspace | Result |
|
| Format | Colorspace | Result |
|
||||||
| ------------------------------------- | ------------------------------ | ------------- |
|
| -------------------- | ------------------------------ | ------------- |
|
||||||
| JPEG | any | direct |
|
| JPEG | any | direct |
|
||||||
| JPEG2000 | any | direct |
|
| JPEG2000 | any | direct |
|
||||||
| PNG (non-interlaced, no transparency) | any | direct |
|
| PNG (non-interlaced) | any | direct |
|
||||||
| TIFF (CCITT Group 4) | monochrome | direct |
|
| TIFF (CCITT Group 4) | monochrome | direct |
|
||||||
| any | any except CMYK and monochrome | PNG Paeth |
|
| any | any except CMYK and monochrome | PNG Paeth |
|
||||||
| any | monochrome | CCITT Group 4 |
|
| any | monochrome | CCITT Group 4 |
|
||||||
| any | CMYK | flate |
|
| any | CMYK | flate |
|
||||||
|
|
||||||
For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4
|
For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4
|
||||||
encoded data, img2pdf directly embeds the image data into the PDF without
|
encoded data, img2pdf directly embeds the image data into the PDF without
|
||||||
|
@ -72,15 +72,25 @@ Bugs
|
||||||
when embedded into the PDF cannot be read by the Adobe Acrobat Reader,
|
when embedded into the PDF cannot be read by the Adobe Acrobat Reader,
|
||||||
please contact me.
|
please contact me.
|
||||||
|
|
||||||
|
- I have not yet figured out how to determine the colorspace of JPEG2000
|
||||||
|
files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000
|
||||||
|
files with other colorspaces, you must explicitly specify it using the
|
||||||
|
`--colorspace` option.
|
||||||
|
|
||||||
|
- Input images with alpha channels are not allowed. PDF only supports
|
||||||
|
transparency using binary masks but is unable to store 8-bit transparency
|
||||||
|
information as part of the image itself. But img2pdf will always be lossless
|
||||||
|
and thus, input images must not carry transparency information. You can
|
||||||
|
remove the alpha channel for example with imagemagick:
|
||||||
|
|
||||||
|
convert input.png -background white -alpha remove -alpha off output.png
|
||||||
|
|
||||||
- An error is produced if the input image is broken. This commonly happens if
|
- An error is produced if the input image is broken. This commonly happens if
|
||||||
the input image has an invalid EXIF Orientation value of zero. Even though
|
the input image has an invalid EXIF Orientation value of zero. Even though
|
||||||
only nine different values from 1 to 9 are permitted, Anroid phones and
|
only nine different values from 1 to 9 are permitted, Anroid phones and
|
||||||
Canon DSLR cameras produce JPEG images with the invalid value of zero.
|
Canon DSLR cameras produce JPEG images with the invalid value of zero.
|
||||||
Either fix your input images with `exiftool` or similar software before
|
Either fix your input images with `exiftool` or similar software before
|
||||||
passing the JPEG to `img2pdf` or run `img2pdf` with `--rotation=ifvalid`
|
passing the JPEG to `img2pdf` or run `img2pdf` with `--rotation=ifvalid`.
|
||||||
(if you run img2pdf from the commandline) or by passing
|
|
||||||
`rotation=img2pdf.Rotation.ifvalid` as an argument to `convert()` when using
|
|
||||||
img2pdf as a library.
|
|
||||||
|
|
||||||
- img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the
|
- img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the
|
||||||
input if necessary. To prevent decompression bomb denial of service attacks,
|
input if necessary. To prevent decompression bomb denial of service attacks,
|
||||||
|
@ -117,9 +127,10 @@ You can then test the converter using:
|
||||||
|
|
||||||
$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg
|
$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg
|
||||||
|
|
||||||
If you don't want to setup Python on Windows, then head to the
|
For Microsoft Windows users, PyInstaller based .exe files are produced by
|
||||||
[releases](/josch/img2pdf/releases) section and download the latest
|
appveyor. If you don't want to install Python before using img2pdf you can head
|
||||||
`img2pdf.exe`.
|
to appveyor and click on "Artifacts" to download the latest version:
|
||||||
|
https://ci.appveyor.com/project/josch/img2pdf
|
||||||
|
|
||||||
GUI
|
GUI
|
||||||
---
|
---
|
||||||
|
@ -146,10 +157,6 @@ The package can also be used as a library:
|
||||||
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
||||||
f1.write(img2pdf.convert(f2))
|
f1.write(img2pdf.convert(f2))
|
||||||
|
|
||||||
# opening using pathlib
|
|
||||||
with open("name.pdf","wb") as f:
|
|
||||||
f.write(img2pdf.convert(pathlib.Path('test.jpg')))
|
|
||||||
|
|
||||||
# using in-memory image data
|
# using in-memory image data
|
||||||
with open("name.pdf","wb") as f:
|
with open("name.pdf","wb") as f:
|
||||||
f.write(img2pdf.convert("\x89PNG...")
|
f.write(img2pdf.convert("\x89PNG...")
|
||||||
|
@ -192,15 +199,6 @@ The package can also be used as a library:
|
||||||
with open("name.pdf","wb") as f:
|
with open("name.pdf","wb") as f:
|
||||||
f.write(img2pdf.convert(glob.glob("/path/to/*.jpg")))
|
f.write(img2pdf.convert(glob.glob("/path/to/*.jpg")))
|
||||||
|
|
||||||
# convert all files matching a glob using pathlib.Path
|
|
||||||
from pathlib import Path
|
|
||||||
with open("name.pdf","wb") as f:
|
|
||||||
f.write(img2pdf.convert(*Path("/path").glob("**/*.jpg")))
|
|
||||||
|
|
||||||
# ignore invalid rotation values in the input images
|
|
||||||
with open("name.pdf","wb") as f:
|
|
||||||
f.write(img2pdf.convert('test.jpg'), rotation=img2pdf.Rotation.ifvalid)
|
|
||||||
|
|
||||||
# writing to file descriptor
|
# writing to file descriptor
|
||||||
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
||||||
img2pdf.convert(f2, outputstream=f1)
|
img2pdf.convert(f2, outputstream=f1)
|
||||||
|
@ -308,14 +306,3 @@ Tesseract might not do a lossless conversion. For example it converts CMYK
|
||||||
input to RGB and removes the alpha channel from images with transparency. For
|
input to RGB and removes the alpha channel from images with transparency. For
|
||||||
multipage TIFF or animated GIF, it will only convert the first frame.
|
multipage TIFF or animated GIF, it will only convert the first frame.
|
||||||
|
|
||||||
Comparison to econvert from ExactImage
|
|
||||||
--------------------------------------
|
|
||||||
|
|
||||||
Like pdflatex and podofoimg2pf, econvert is able to embed JPEG images into PDF
|
|
||||||
directly without re-encoding but when given other file formats, it stores them
|
|
||||||
just using flate compressen, which unnecessarily increases the filesize.
|
|
||||||
Furthermore, it throws an error with CMYK TIF input. It also doesn't store CMYK
|
|
||||||
jpeg files as CMYK but converts them to RGB, so it's not lossless. When trying
|
|
||||||
to feed it 16bit files, it errors out with Unhandled bps/spp combination. It
|
|
||||||
also seems to choose JPEG encoding when using it on some file types (like
|
|
||||||
palette images) making it again not lossless for that input as well.
|
|
||||||
|
|
10
appveyor.yml
10
appveyor.yml
|
@ -16,18 +16,16 @@ environment:
|
||||||
- PYTHON: "C:\\Python37-x64"
|
- PYTHON: "C:\\Python37-x64"
|
||||||
|
|
||||||
install:
|
install:
|
||||||
- "%PYTHON%\\python.exe -m pip install tox wheel pyinstaller Pillow"
|
- "%PYTHON%\\python.exe -m pip install tox wheel pyinstaller"
|
||||||
|
|
||||||
build: off
|
build: off
|
||||||
|
|
||||||
# don't run tests on windows because we don't have imagemagick
|
test_script:
|
||||||
#test_script:
|
- "%PYTHON%\\python.exe -m tox"
|
||||||
# - "%PYTHON%\\python.exe -m tox"
|
|
||||||
|
|
||||||
after_test:
|
after_test:
|
||||||
- "%PYTHON%\\python.exe setup.py bdist_wheel"
|
- "%PYTHON%\\python.exe setup.py bdist_wheel"
|
||||||
- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --console --nowindowed --name img2pdf src/img2pdf.py"
|
- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole src/img2pdf.py"
|
||||||
#- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole --windowed --name img2pdf_windowed src/img2pdf.py"
|
|
||||||
|
|
||||||
artifacts:
|
artifacts:
|
||||||
- path: dist\*
|
- path: dist\*
|
||||||
|
|
2
setup.cfg
Normal file
2
setup.cfg
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
[metadata]
|
||||||
|
description-file = README.md
|
4
setup.py
4
setup.py
|
@ -1,7 +1,7 @@
|
||||||
import sys
|
import sys
|
||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
|
|
||||||
VERSION = "0.5.1"
|
VERSION = "0.4.1"
|
||||||
|
|
||||||
INSTALL_REQUIRES = (
|
INSTALL_REQUIRES = (
|
||||||
"Pillow",
|
"Pillow",
|
||||||
|
@ -11,7 +11,7 @@ INSTALL_REQUIRES = (
|
||||||
setup(
|
setup(
|
||||||
name="img2pdf",
|
name="img2pdf",
|
||||||
version=VERSION,
|
version=VERSION,
|
||||||
author="Johannes Schauer Marin Rodrigues",
|
author="Johannes 'josch' Schauer",
|
||||||
author_email="josch@mister-muffin.de",
|
author_email="josch@mister-muffin.de",
|
||||||
description="Convert images to PDF via direct JPEG inclusion.",
|
description="Convert images to PDF via direct JPEG inclusion.",
|
||||||
long_description=open("README.md").read(),
|
long_description=open("README.md").read(),
|
||||||
|
|
895
src/img2pdf.py
895
src/img2pdf.py
File diff suppressed because it is too large
Load diff
2523
src/img2pdf_test.py
2523
src/img2pdf_test.py
File diff suppressed because it is too large
Load diff
57
src/jp2.py
57
src/jp2.py
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
#
|
#
|
||||||
# Copyright (C) 2013 Johannes Schauer Marin Rodrigues <j.schauer at email.de>
|
# Copyright (C) 2013 Johannes 'josch' Schauer <j.schauer at email.de>
|
||||||
#
|
#
|
||||||
# this module is heavily based upon jpylyzer which is
|
# this module is heavily based upon jpylyzer which is
|
||||||
# KB / National Library of the Netherlands, Open Planets Foundation
|
# KB / National Library of the Netherlands, Open Planets Foundation
|
||||||
|
@ -37,8 +37,9 @@ def getBox(data, byteStart, noBytes):
|
||||||
|
|
||||||
|
|
||||||
def parse_ihdr(data):
|
def parse_ihdr(data):
|
||||||
height, width, channels, bpp = struct.unpack(">IIHB", data[:11])
|
height = struct.unpack(">I", data[0:4])[0]
|
||||||
return width, height, channels, bpp + 1
|
width = struct.unpack(">I", data[4:8])[0]
|
||||||
|
return width, height
|
||||||
|
|
||||||
|
|
||||||
def parse_colr(data):
|
def parse_colr(data):
|
||||||
|
@ -58,8 +59,8 @@ def parse_colr(data):
|
||||||
|
|
||||||
def parse_resc(data):
|
def parse_resc(data):
|
||||||
hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data)
|
hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data)
|
||||||
hdpi = ((hnum / hden) * (10**hexp) * 100) / 2.54
|
hdpi = ((hnum / hden) * (10 ** hexp) * 100) / 2.54
|
||||||
vdpi = ((vnum / vden) * (10**vexp) * 100) / 2.54
|
vdpi = ((vnum / vden) * (10 ** vexp) * 100) / 2.54
|
||||||
return hdpi, vdpi
|
return hdpi, vdpi
|
||||||
|
|
||||||
|
|
||||||
|
@ -84,13 +85,13 @@ def parse_jp2h(data):
|
||||||
while byteStart < noBytes and boxLengthValue != 0:
|
while byteStart < noBytes and boxLengthValue != 0:
|
||||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||||
if boxType == b"ihdr":
|
if boxType == b"ihdr":
|
||||||
width, height, channels, bpp = parse_ihdr(boxContents)
|
width, height = parse_ihdr(boxContents)
|
||||||
elif boxType == b"colr":
|
elif boxType == b"colr":
|
||||||
colorspace = parse_colr(boxContents)
|
colorspace = parse_colr(boxContents)
|
||||||
elif boxType == b"res ":
|
elif boxType == b"res ":
|
||||||
hdpi, vdpi = parse_res(boxContents)
|
hdpi, vdpi = parse_res(boxContents)
|
||||||
byteStart = byteEnd
|
byteStart = byteEnd
|
||||||
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
return (width, height, colorspace, hdpi, vdpi)
|
||||||
|
|
||||||
|
|
||||||
def parsejp2(data):
|
def parsejp2(data):
|
||||||
|
@ -101,9 +102,7 @@ def parsejp2(data):
|
||||||
while byteStart < noBytes and boxLengthValue != 0:
|
while byteStart < noBytes and boxLengthValue != 0:
|
||||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||||
if boxType == b"jp2h":
|
if boxType == b"jp2h":
|
||||||
width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h(
|
width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents)
|
||||||
boxContents
|
|
||||||
)
|
|
||||||
break
|
break
|
||||||
byteStart = byteEnd
|
byteStart = byteEnd
|
||||||
if not width:
|
if not width:
|
||||||
|
@ -113,41 +112,13 @@ def parsejp2(data):
|
||||||
if not colorspace:
|
if not colorspace:
|
||||||
raise Exception("no colorspace in jp2 header")
|
raise Exception("no colorspace in jp2 header")
|
||||||
# retrieving the dpi is optional so we do not error out if not present
|
# retrieving the dpi is optional so we do not error out if not present
|
||||||
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
return (width, height, colorspace, hdpi, vdpi)
|
||||||
|
|
||||||
|
|
||||||
def parsej2k(data):
|
|
||||||
lsiz, rsiz, xsiz, ysiz, xosiz, yosiz, _, _, _, _, csiz = struct.unpack(
|
|
||||||
">HHIIIIIIIIH", data[4:42]
|
|
||||||
)
|
|
||||||
ssiz = [None] * csiz
|
|
||||||
xrsiz = [None] * csiz
|
|
||||||
yrsiz = [None] * csiz
|
|
||||||
for i in range(csiz):
|
|
||||||
ssiz[i], xrsiz[i], yrsiz[i] = struct.unpack(
|
|
||||||
"BBB", data[42 + 3 * i : 42 + 3 * (i + 1)]
|
|
||||||
)
|
|
||||||
assert ssiz == [7, 7, 7]
|
|
||||||
return xsiz - xosiz, ysiz - yosiz, None, None, None, csiz, 8
|
|
||||||
|
|
||||||
|
|
||||||
def parse(data):
|
|
||||||
if data[:4] == b"\xff\x4f\xff\x51":
|
|
||||||
return parsej2k(data)
|
|
||||||
else:
|
|
||||||
return parsejp2(data)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
width, height, colorspace, hdpi, vdpi, channels, bpp = parse(
|
width, height, colorspace = parsejp2(open(sys.argv[1]).read())
|
||||||
open(sys.argv[1], "rb").read()
|
sys.stdout.write("width = %d" % width)
|
||||||
)
|
sys.stdout.write("height = %d" % height)
|
||||||
print("width = %d" % width)
|
sys.stdout.write("colorspace = %s" % colorspace)
|
||||||
print("height = %d" % height)
|
|
||||||
print("colorspace = %s" % colorspace)
|
|
||||||
print("hdpi = %s" % hdpi)
|
|
||||||
print("vdpi = %s" % vdpi)
|
|
||||||
print("channels = %s" % channels)
|
|
||||||
print("bpp = %s" % bpp)
|
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 1.9 KiB |
Binary file not shown.
2
tox.ini
2
tox.ini
|
@ -4,7 +4,7 @@
|
||||||
# and then run "tox" from this directory.
|
# and then run "tox" from this directory.
|
||||||
|
|
||||||
[tox]
|
[tox]
|
||||||
envlist = py37, py38, py39, py310
|
envlist = py35, py36, py37, py38, py39
|
||||||
skip_missing_interpreters = true
|
skip_missing_interpreters = true
|
||||||
|
|
||||||
[testenv]
|
[testenv]
|
||||||
|
|
Loading…
Reference in a new issue