forked from josch/img2pdf
Compare commits
91 commits
tests-no-d
...
main
Author | SHA1 | Date | |
---|---|---|---|
819b366bf5 | |||
cc8c708295 | |||
fb9537d8b7 | |||
7678435eb7 | |||
ba7a360866 | |||
7f0bf47ff3 | |||
|
5cd0918d50 | ||
|
f157ced05d | ||
09064e8e70 | |||
2f736d7891 | |||
e05580a49a | |||
acc25a4926 | |||
f597887088 | |||
3e832fbcc2 | |||
1e8557cef1 | |||
29921eeabd | |||
33139612f8 | |||
64d27f4a8b | |||
85cbe1d128 | |||
b25429a4c1 | |||
c703e9df06 | |||
79e9985f35 | |||
cb2644c34f | |||
81502f21af | |||
0cbcb8fa12 | |||
e9e04b6dd9 | |||
fc059ee471 | |||
25466113e9 | |||
7405635b72 | |||
aea472101b | |||
7fa67bb337 | |||
7d40569aa1 | |||
83f9c32328 | |||
be8369373f | |||
10c6901fa3 | |||
57d7e07e6b | |||
272fe0433f | |||
ef7b9e739d | |||
af6fe27d53 | |||
bad6fcae39 | |||
d9b90499f3 | |||
edb0d29a14 | |||
bb3e8b0098 | |||
f454ebc6a6 | |||
c3db273e23 | |||
87afabd3cf | |||
|
5045282cc2 | ||
fb4b96452a | |||
c553e169a4 | |||
d9345ac767 | |||
1d52530229 | |||
3b117e674b | |||
e8ca53738f | |||
7c48bfb868 | |||
244f034a2e | |||
3da370d3bd | |||
6cff2931e4 | |||
6a55258804 | |||
3cdeab08ab | |||
cea7c9120b | |||
9eacfdaa76 | |||
95a313f437 | |||
30d705f020 | |||
dc926b2cf2 | |||
a8fdbd0038 | |||
6ff175d637 | |||
0732dff0be | |||
50b7145f64 | |||
e522ec14d9 | |||
9c9e5ece19 | |||
354fd7c264 | |||
392d4a665e | |||
09ad147d97 | |||
80393b6efa | |||
e265738ac2 | |||
1ffb160453 | |||
cde7472d15 | |||
6eec05c11c | |||
|
f483638b17 | ||
|
7f216a8848 | ||
|
2476215f39 | ||
|
f62858c245 | ||
|
a5e4da5755 | ||
|
64db7909ec | ||
|
af5ae5b9b6 | ||
d03f331521 | |||
635b08c321 | |||
152f6fb629 | |||
1f3b456ac9 | |||
4c5b72dab0 | |||
853a1ec363 |
14 changed files with 2126 additions and 704 deletions
3
.mailmap
Normal file
3
.mailmap
Normal file
|
@ -0,0 +1,3 @@
|
|||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
|
||||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <j.schauer@email.de>
|
||||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <josch@pyneo.org>
|
|
@ -13,7 +13,6 @@ matrix:
|
|||
- netpbm
|
||||
- ghostscript
|
||||
- mupdf-tools
|
||||
- icc-profiles-free
|
||||
- name: "python 3.9 Windows"
|
||||
os: windows
|
||||
language: shell # 'language: python' is an error on Travis CI Windows
|
||||
|
|
39
CHANGES.rst
39
CHANGES.rst
|
@ -2,7 +2,44 @@
|
|||
CHANGES
|
||||
=======
|
||||
|
||||
0.4.1 (2020-05-09)
|
||||
0.5.1 (2023-11-26)
|
||||
------------------
|
||||
|
||||
- no default ICC profile location for PDF/A-1b on Windows
|
||||
- workaround for PNG input without dpi units but non-square dpi aspect ratio
|
||||
|
||||
0.5.0 (2023-10-28)
|
||||
------------------
|
||||
|
||||
- support MIFF for 16 bit CMYK input
|
||||
- accept pathlib.Path objects as input
|
||||
- don't store RGB ICC profiles from bilevel or grayscale TIFF, PNG and JPEG
|
||||
- thumbnails are no longer included by default and --include-thumbnails has to
|
||||
be used if you want them
|
||||
- support for pikepdf (>= 6.2.0)
|
||||
|
||||
0.4.4 (2022-04-07)
|
||||
------------------
|
||||
|
||||
- --viewer-page-layout support for twopageright and twopageleft
|
||||
- Add B and JB paper sizes
|
||||
- support for pikepdf (>= 5.0.0) and Pillow (>= 9.1.0)
|
||||
|
||||
0.4.3 (2021-10-24)
|
||||
------------------
|
||||
|
||||
- fix --viewer-initial-page (broken in last release)
|
||||
|
||||
0.4.2 (2021-10-11)
|
||||
------------------
|
||||
|
||||
- add --rotation
|
||||
- allow palette PNG images with ICC profile
|
||||
- sort globbing result on windows
|
||||
- convert 8-bit PNG alpha channels to /SMasks in PDF
|
||||
- remove pdfrw from tests
|
||||
|
||||
0.4.1 (2021-05-09)
|
||||
------------------
|
||||
|
||||
- support wildcards in paths on windows
|
||||
|
|
39
HACKING
39
HACKING
|
@ -27,6 +27,41 @@ Making a new release
|
|||
|
||||
- Build and upload to pypi:
|
||||
|
||||
$ rm dist/*
|
||||
$ rm -rf dist/*
|
||||
$ python3 setup.py sdist
|
||||
$ twine upload --sign dist/*
|
||||
$ twine upload dist/*
|
||||
|
||||
Using debbisect to find regressions
|
||||
-----------------------------------
|
||||
|
||||
$ debbisect --cache=./cache --depends="git,ca-certificates,python3,
|
||||
ghostscript,imagemagick,mupdf-tools,poppler-utils,python3-pil,
|
||||
python3-pytest,python3-numpy,python3-scipy,python3-pikepdf" \
|
||||
--verbose 2023-09-16 2023-10-24 \
|
||||
'chroot "$1" sh -c "
|
||||
git clone https://gitlab.mister-muffin.de/josch/img2pdf.git
|
||||
&& cd img2pdf
|
||||
&& pytest 'src/img2pdf_test.py::test_jpg_2000_rgba8[internal]"'
|
||||
|
||||
Using debbisect cache
|
||||
---------------------
|
||||
|
||||
$ mmdebstrap --variant=apt --aptopt='Acquire::Check-Valid-Until "false"' \
|
||||
--include=git,ca-certificates,python3,ghostscript,imagemagick \
|
||||
--include=mupdf-tools,poppler-utils,python3-pil,python3-pytest \
|
||||
--include=python3-numpy,python3-scipy,python3-pikepdf \
|
||||
--hook-dir=/usr/share/mmdebstrap/hooks/file-mirror-automount \
|
||||
--setup-hook='mkdir -p "$1/home/josch/git/devscripts/cache/pool/"' \
|
||||
--setup-hook='mount -o ro,bind /home/josch/git/devscripts/cache/pool/ "$1/home/josch/git/devscripts/cache/pool/"' \
|
||||
--chrooted-customize-hook=bash
|
||||
unstable /dev/null
|
||||
file:///home/josch/git/devscripts/cache/archive/debian/20231022T090139Z/
|
||||
|
||||
Bisecting imagemagick
|
||||
---------------------
|
||||
|
||||
$ git clean -fdx && git reset --hard
|
||||
$ ./configure --prefix=$(pwd)/prefix
|
||||
$ make -j$(nproc)
|
||||
$ make install
|
||||
$ LD_LIBRARY_PATH=$(pwd)/prefix/lib prefix/bin/compare ...
|
||||
|
|
71
README.md
71
README.md
|
@ -1,5 +1,5 @@
|
|||
[![Travis Status](https://travis-ci.org/josch/img2pdf.svg?branch=master)](https://travis-ci.org/josch/img2pdf)
|
||||
[![Appveyor Status](https://ci.appveyor.com/api/projects/status/2kws3wkqvi526llj/branch/master?svg=true)](https://ci.appveyor.com/project/josch/img2pdf/branch/master)
|
||||
[![Travis Status](https://travis-ci.com/josch/img2pdf.svg?branch=main)](https://app.travis-ci.com/josch/img2pdf)
|
||||
[![Appveyor Status](https://ci.appveyor.com/api/projects/status/2kws3wkqvi526llj/branch/main?svg=true)](https://ci.appveyor.com/project/josch/img2pdf/branch/main)
|
||||
|
||||
img2pdf
|
||||
=======
|
||||
|
@ -27,15 +27,15 @@ software, because the raw pixel data never has to be loaded into memory.
|
|||
The following table shows how img2pdf handles different input depending on the
|
||||
input file format and image color space.
|
||||
|
||||
| Format | Colorspace | Result |
|
||||
| -------------------- | ------------------------------ | ------------- |
|
||||
| JPEG | any | direct |
|
||||
| JPEG2000 | any | direct |
|
||||
| PNG (non-interlaced) | any | direct |
|
||||
| TIFF (CCITT Group 4) | monochrome | direct |
|
||||
| any | any except CMYK and monochrome | PNG Paeth |
|
||||
| any | monochrome | CCITT Group 4 |
|
||||
| any | CMYK | flate |
|
||||
| Format | Colorspace | Result |
|
||||
| ------------------------------------- | ------------------------------ | ------------- |
|
||||
| JPEG | any | direct |
|
||||
| JPEG2000 | any | direct |
|
||||
| PNG (non-interlaced, no transparency) | any | direct |
|
||||
| TIFF (CCITT Group 4) | monochrome | direct |
|
||||
| any | any except CMYK and monochrome | PNG Paeth |
|
||||
| any | monochrome | CCITT Group 4 |
|
||||
| any | CMYK | flate |
|
||||
|
||||
For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4
|
||||
encoded data, img2pdf directly embeds the image data into the PDF without
|
||||
|
@ -72,25 +72,15 @@ Bugs
|
|||
when embedded into the PDF cannot be read by the Adobe Acrobat Reader,
|
||||
please contact me.
|
||||
|
||||
- I have not yet figured out how to determine the colorspace of JPEG2000
|
||||
files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000
|
||||
files with other colorspaces, you must explicitly specify it using the
|
||||
`--colorspace` option.
|
||||
|
||||
- Input images with alpha channels are not allowed. PDF only supports
|
||||
transparency using binary masks but is unable to store 8-bit transparency
|
||||
information as part of the image itself. But img2pdf will always be lossless
|
||||
and thus, input images must not carry transparency information. You can
|
||||
remove the alpha channel for example with imagemagick:
|
||||
|
||||
convert input.png -background white -alpha remove -alpha off output.png
|
||||
|
||||
- An error is produced if the input image is broken. This commonly happens if
|
||||
the input image has an invalid EXIF Orientation value of zero. Even though
|
||||
only nine different values from 1 to 9 are permitted, Anroid phones and
|
||||
Canon DSLR cameras produce JPEG images with the invalid value of zero.
|
||||
Either fix your input images with `exiftool` or similar software before
|
||||
passing the JPEG to `img2pdf` or run `img2pdf` with `--rotation=ifvalid`.
|
||||
passing the JPEG to `img2pdf` or run `img2pdf` with `--rotation=ifvalid`
|
||||
(if you run img2pdf from the commandline) or by passing
|
||||
`rotation=img2pdf.Rotation.ifvalid` as an argument to `convert()` when using
|
||||
img2pdf as a library.
|
||||
|
||||
- img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the
|
||||
input if necessary. To prevent decompression bomb denial of service attacks,
|
||||
|
@ -127,10 +117,9 @@ You can then test the converter using:
|
|||
|
||||
$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg
|
||||
|
||||
For Microsoft Windows users, PyInstaller based .exe files are produced by
|
||||
appveyor. If you don't want to install Python before using img2pdf you can head
|
||||
to appveyor and click on "Artifacts" to download the latest version:
|
||||
https://ci.appveyor.com/project/josch/img2pdf
|
||||
If you don't want to setup Python on Windows, then head to the
|
||||
[releases](/josch/img2pdf/releases) section and download the latest
|
||||
`img2pdf.exe`.
|
||||
|
||||
GUI
|
||||
---
|
||||
|
@ -157,6 +146,10 @@ The package can also be used as a library:
|
|||
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
||||
f1.write(img2pdf.convert(f2))
|
||||
|
||||
# opening using pathlib
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert(pathlib.Path('test.jpg')))
|
||||
|
||||
# using in-memory image data
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert("\x89PNG...")
|
||||
|
@ -199,6 +192,15 @@ The package can also be used as a library:
|
|||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert(glob.glob("/path/to/*.jpg")))
|
||||
|
||||
# convert all files matching a glob using pathlib.Path
|
||||
from pathlib import Path
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert(*Path("/path").glob("**/*.jpg")))
|
||||
|
||||
# ignore invalid rotation values in the input images
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert('test.jpg'), rotation=img2pdf.Rotation.ifvalid)
|
||||
|
||||
# writing to file descriptor
|
||||
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
||||
img2pdf.convert(f2, outputstream=f1)
|
||||
|
@ -306,3 +308,14 @@ Tesseract might not do a lossless conversion. For example it converts CMYK
|
|||
input to RGB and removes the alpha channel from images with transparency. For
|
||||
multipage TIFF or animated GIF, it will only convert the first frame.
|
||||
|
||||
Comparison to econvert from ExactImage
|
||||
--------------------------------------
|
||||
|
||||
Like pdflatex and podofoimg2pf, econvert is able to embed JPEG images into PDF
|
||||
directly without re-encoding but when given other file formats, it stores them
|
||||
just using flate compressen, which unnecessarily increases the filesize.
|
||||
Furthermore, it throws an error with CMYK TIF input. It also doesn't store CMYK
|
||||
jpeg files as CMYK but converts them to RGB, so it's not lossless. When trying
|
||||
to feed it 16bit files, it errors out with Unhandled bps/spp combination. It
|
||||
also seems to choose JPEG encoding when using it on some file types (like
|
||||
palette images) making it again not lossless for that input as well.
|
||||
|
|
10
appveyor.yml
10
appveyor.yml
|
@ -16,16 +16,18 @@ environment:
|
|||
- PYTHON: "C:\\Python37-x64"
|
||||
|
||||
install:
|
||||
- "%PYTHON%\\python.exe -m pip install tox wheel pyinstaller"
|
||||
- "%PYTHON%\\python.exe -m pip install tox wheel pyinstaller Pillow"
|
||||
|
||||
build: off
|
||||
|
||||
test_script:
|
||||
- "%PYTHON%\\python.exe -m tox"
|
||||
# don't run tests on windows because we don't have imagemagick
|
||||
#test_script:
|
||||
# - "%PYTHON%\\python.exe -m tox"
|
||||
|
||||
after_test:
|
||||
- "%PYTHON%\\python.exe setup.py bdist_wheel"
|
||||
- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole src/img2pdf.py"
|
||||
- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --console --nowindowed --name img2pdf src/img2pdf.py"
|
||||
#- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole --windowed --name img2pdf_windowed src/img2pdf.py"
|
||||
|
||||
artifacts:
|
||||
- path: dist\*
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
[metadata]
|
||||
description-file = README.md
|
4
setup.py
4
setup.py
|
@ -1,7 +1,7 @@
|
|||
import sys
|
||||
from setuptools import setup
|
||||
|
||||
VERSION = "0.4.1"
|
||||
VERSION = "0.5.1"
|
||||
|
||||
INSTALL_REQUIRES = (
|
||||
"Pillow",
|
||||
|
@ -11,7 +11,7 @@ INSTALL_REQUIRES = (
|
|||
setup(
|
||||
name="img2pdf",
|
||||
version=VERSION,
|
||||
author="Johannes 'josch' Schauer",
|
||||
author="Johannes Schauer Marin Rodrigues",
|
||||
author_email="josch@mister-muffin.de",
|
||||
description="Convert images to PDF via direct JPEG inclusion.",
|
||||
long_description=open("README.md").read(),
|
||||
|
|
895
src/img2pdf.py
895
src/img2pdf.py
File diff suppressed because it is too large
Load diff
1707
src/img2pdf_test.py
1707
src/img2pdf_test.py
File diff suppressed because it is too large
Load diff
57
src/jp2.py
57
src/jp2.py
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright (C) 2013 Johannes 'josch' Schauer <j.schauer at email.de>
|
||||
# Copyright (C) 2013 Johannes Schauer Marin Rodrigues <j.schauer at email.de>
|
||||
#
|
||||
# this module is heavily based upon jpylyzer which is
|
||||
# KB / National Library of the Netherlands, Open Planets Foundation
|
||||
|
@ -37,9 +37,8 @@ def getBox(data, byteStart, noBytes):
|
|||
|
||||
|
||||
def parse_ihdr(data):
|
||||
height = struct.unpack(">I", data[0:4])[0]
|
||||
width = struct.unpack(">I", data[4:8])[0]
|
||||
return width, height
|
||||
height, width, channels, bpp = struct.unpack(">IIHB", data[:11])
|
||||
return width, height, channels, bpp + 1
|
||||
|
||||
|
||||
def parse_colr(data):
|
||||
|
@ -59,8 +58,8 @@ def parse_colr(data):
|
|||
|
||||
def parse_resc(data):
|
||||
hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data)
|
||||
hdpi = ((hnum / hden) * (10 ** hexp) * 100) / 2.54
|
||||
vdpi = ((vnum / vden) * (10 ** vexp) * 100) / 2.54
|
||||
hdpi = ((hnum / hden) * (10**hexp) * 100) / 2.54
|
||||
vdpi = ((vnum / vden) * (10**vexp) * 100) / 2.54
|
||||
return hdpi, vdpi
|
||||
|
||||
|
||||
|
@ -85,13 +84,13 @@ def parse_jp2h(data):
|
|||
while byteStart < noBytes and boxLengthValue != 0:
|
||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||
if boxType == b"ihdr":
|
||||
width, height = parse_ihdr(boxContents)
|
||||
width, height, channels, bpp = parse_ihdr(boxContents)
|
||||
elif boxType == b"colr":
|
||||
colorspace = parse_colr(boxContents)
|
||||
elif boxType == b"res ":
|
||||
hdpi, vdpi = parse_res(boxContents)
|
||||
byteStart = byteEnd
|
||||
return (width, height, colorspace, hdpi, vdpi)
|
||||
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||
|
||||
|
||||
def parsejp2(data):
|
||||
|
@ -102,7 +101,9 @@ def parsejp2(data):
|
|||
while byteStart < noBytes and boxLengthValue != 0:
|
||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||
if boxType == b"jp2h":
|
||||
width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents)
|
||||
width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h(
|
||||
boxContents
|
||||
)
|
||||
break
|
||||
byteStart = byteEnd
|
||||
if not width:
|
||||
|
@ -112,13 +113,41 @@ def parsejp2(data):
|
|||
if not colorspace:
|
||||
raise Exception("no colorspace in jp2 header")
|
||||
# retrieving the dpi is optional so we do not error out if not present
|
||||
return (width, height, colorspace, hdpi, vdpi)
|
||||
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||
|
||||
|
||||
def parsej2k(data):
|
||||
lsiz, rsiz, xsiz, ysiz, xosiz, yosiz, _, _, _, _, csiz = struct.unpack(
|
||||
">HHIIIIIIIIH", data[4:42]
|
||||
)
|
||||
ssiz = [None] * csiz
|
||||
xrsiz = [None] * csiz
|
||||
yrsiz = [None] * csiz
|
||||
for i in range(csiz):
|
||||
ssiz[i], xrsiz[i], yrsiz[i] = struct.unpack(
|
||||
"BBB", data[42 + 3 * i : 42 + 3 * (i + 1)]
|
||||
)
|
||||
assert ssiz == [7, 7, 7]
|
||||
return xsiz - xosiz, ysiz - yosiz, None, None, None, csiz, 8
|
||||
|
||||
|
||||
def parse(data):
|
||||
if data[:4] == b"\xff\x4f\xff\x51":
|
||||
return parsej2k(data)
|
||||
else:
|
||||
return parsejp2(data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
width, height, colorspace = parsejp2(open(sys.argv[1]).read())
|
||||
sys.stdout.write("width = %d" % width)
|
||||
sys.stdout.write("height = %d" % height)
|
||||
sys.stdout.write("colorspace = %s" % colorspace)
|
||||
width, height, colorspace, hdpi, vdpi, channels, bpp = parse(
|
||||
open(sys.argv[1], "rb").read()
|
||||
)
|
||||
print("width = %d" % width)
|
||||
print("height = %d" % height)
|
||||
print("colorspace = %s" % colorspace)
|
||||
print("hdpi = %s" % hdpi)
|
||||
print("vdpi = %s" % vdpi)
|
||||
print("channels = %s" % channels)
|
||||
print("bpp = %s" % bpp)
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 1.9 KiB |
Binary file not shown.
2
tox.ini
2
tox.ini
|
@ -4,7 +4,7 @@
|
|||
# and then run "tox" from this directory.
|
||||
|
||||
[tox]
|
||||
envlist = py35, py36, py37, py38, py39
|
||||
envlist = py37, py38, py39, py310
|
||||
skip_missing_interpreters = true
|
||||
|
||||
[testenv]
|
||||
|
|
Loading…
Reference in a new issue