Compare commits
148 commits
Author | SHA1 | Date | |
---|---|---|---|
|
b91007fef8 | ||
a8cb28ba31 | |||
c6d12d6239 | |||
59132f20f8 | |||
3ba7d17e15 | |||
43c16ac369 | |||
08c4d9beec | |||
9e6eba9f40 | |||
5aeb628506 | |||
b6dbfdb481 | |||
23436114f8 | |||
2d5e4e3cb7 | |||
5e515abb6f | |||
a2e2998fb1 | |||
14948e7ba8 | |||
bcfdf8b54e | |||
9f74740c95 | |||
cbc3d50c63 | |||
4b549592bf | |||
5540365cfd | |||
819b366bf5 | |||
cc8c708295 | |||
fb9537d8b7 | |||
7678435eb7 | |||
ba7a360866 | |||
7f0bf47ff3 | |||
|
5cd0918d50 | ||
|
f157ced05d | ||
09064e8e70 | |||
2f736d7891 | |||
e05580a49a | |||
acc25a4926 | |||
f597887088 | |||
3e832fbcc2 | |||
1e8557cef1 | |||
29921eeabd | |||
33139612f8 | |||
64d27f4a8b | |||
85cbe1d128 | |||
b25429a4c1 | |||
c703e9df06 | |||
79e9985f35 | |||
cb2644c34f | |||
81502f21af | |||
0cbcb8fa12 | |||
e9e04b6dd9 | |||
fc059ee471 | |||
25466113e9 | |||
7405635b72 | |||
aea472101b | |||
7fa67bb337 | |||
7d40569aa1 | |||
83f9c32328 | |||
be8369373f | |||
10c6901fa3 | |||
57d7e07e6b | |||
272fe0433f | |||
ef7b9e739d | |||
af6fe27d53 | |||
bad6fcae39 | |||
d9b90499f3 | |||
edb0d29a14 | |||
bb3e8b0098 | |||
f454ebc6a6 | |||
c3db273e23 | |||
87afabd3cf | |||
|
5045282cc2 | ||
fb4b96452a | |||
c553e169a4 | |||
d9345ac767 | |||
1d52530229 | |||
3b117e674b | |||
e8ca53738f | |||
7c48bfb868 | |||
244f034a2e | |||
3da370d3bd | |||
6cff2931e4 | |||
6a55258804 | |||
3cdeab08ab | |||
cea7c9120b | |||
9eacfdaa76 | |||
95a313f437 | |||
30d705f020 | |||
dc926b2cf2 | |||
a8fdbd0038 | |||
6ff175d637 | |||
0732dff0be | |||
50b7145f64 | |||
e522ec14d9 | |||
9c9e5ece19 | |||
354fd7c264 | |||
392d4a665e | |||
09ad147d97 | |||
80393b6efa | |||
e265738ac2 | |||
1ffb160453 | |||
cde7472d15 | |||
6eec05c11c | |||
|
f483638b17 | ||
|
7f216a8848 | ||
|
2476215f39 | ||
|
f62858c245 | ||
|
a5e4da5755 | ||
|
64db7909ec | ||
|
af5ae5b9b6 | ||
d03f331521 | |||
635b08c321 | |||
152f6fb629 | |||
1f3b456ac9 | |||
4c5b72dab0 | |||
853a1ec363 | |||
55d589a548 | |||
5c617965f5 | |||
0067edf965 | |||
91e3a94c3d | |||
3d7e0e6812 | |||
b4c8aa1a5f | |||
114d7270a2 | |||
80d24a1d49 | |||
ea2245757f | |||
9cda595cd5 | |||
2eabebb513 | |||
02c85a50ad | |||
c97ce34023 | |||
81325d3998 | |||
8d2ae0f64e | |||
d29c596fe7 | |||
cd1088a5a9 | |||
2a8779295f | |||
6cd819d28f | |||
c48e1dbb1e | |||
d08d8c5be9 | |||
0e4f0047b2 | |||
0ce25d08c2 | |||
c5fd43e851 | |||
17fd73aed8 | |||
454d4e7775 | |||
cb2243fd10 | |||
129bd15b43 | |||
b8bfa98218 | |||
b5f0912e13 | |||
213a6af41f | |||
9290cb4a10 | |||
|
505344f83e | ||
|
32b4ed1f43 | ||
b2c3b641db | |||
c4fb1d886f | |||
11907242a5 |
18 changed files with 3056 additions and 3122 deletions
3
.mailmap
Normal file
3
.mailmap
Normal file
|
@ -0,0 +1,3 @@
|
|||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
|
||||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <j.schauer@email.de>
|
||||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <josch@pyneo.org>
|
|
@ -13,14 +13,14 @@ matrix:
|
|||
- netpbm
|
||||
- ghostscript
|
||||
- mupdf-tools
|
||||
- icc-profiles-free
|
||||
- name: "python 3.8 Windows"
|
||||
- name: "python 3.9 Windows"
|
||||
os: windows
|
||||
language: shell # 'language: python' is an error on Travis CI Windows
|
||||
before_install: choco install python imagemagick
|
||||
env: PATH=/c/Python38:/c/Python38/Scripts:$PATH
|
||||
env: PATH=/c/Python39:/c/Python39/Scripts:$PATH
|
||||
- name: "python 3.7 MacOs"
|
||||
os: osx
|
||||
osx_image: xcode12.2 # pikepdf import fails with earlier versions
|
||||
language: shell # 'language: python' is an error on Travis CI macOS
|
||||
cache:
|
||||
directories:
|
||||
|
|
53
CHANGES.rst
53
CHANGES.rst
|
@ -2,6 +2,59 @@
|
|||
CHANGES
|
||||
=======
|
||||
|
||||
0.6.0 (2025-02-15)
|
||||
------------------
|
||||
|
||||
- Add support for JBIG2 (generic coding)
|
||||
- Add convert_to_docobject() broken out from convert()
|
||||
- Add pil_get_dpi() broken out from get_imgmetadata()
|
||||
|
||||
0.5.1 (2023-11-26)
|
||||
------------------
|
||||
|
||||
- no default ICC profile location for PDF/A-1b on Windows
|
||||
- workaround for PNG input without dpi units but non-square dpi aspect ratio
|
||||
|
||||
0.5.0 (2023-10-28)
|
||||
------------------
|
||||
|
||||
- support MIFF for 16 bit CMYK input
|
||||
- accept pathlib.Path objects as input
|
||||
- don't store RGB ICC profiles from bilevel or grayscale TIFF, PNG and JPEG
|
||||
- thumbnails are no longer included by default and --include-thumbnails has to
|
||||
be used if you want them
|
||||
- support for pikepdf (>= 6.2.0)
|
||||
|
||||
0.4.4 (2022-04-07)
|
||||
------------------
|
||||
|
||||
- --viewer-page-layout support for twopageright and twopageleft
|
||||
- Add B and JB paper sizes
|
||||
- support for pikepdf (>= 5.0.0) and Pillow (>= 9.1.0)
|
||||
|
||||
0.4.3 (2021-10-24)
|
||||
------------------
|
||||
|
||||
- fix --viewer-initial-page (broken in last release)
|
||||
|
||||
0.4.2 (2021-10-11)
|
||||
------------------
|
||||
|
||||
- add --rotation
|
||||
- allow palette PNG images with ICC profile
|
||||
- sort globbing result on windows
|
||||
- convert 8-bit PNG alpha channels to /SMasks in PDF
|
||||
- remove pdfrw from tests
|
||||
|
||||
0.4.1 (2021-05-09)
|
||||
------------------
|
||||
|
||||
- support wildcards in paths on windows
|
||||
- support MPO images
|
||||
- fix page border computation
|
||||
- use "img2pdf" logger instead of "root" logger
|
||||
- add --from-file
|
||||
|
||||
0.4.0 (2020-08-07)
|
||||
------------------
|
||||
|
||||
|
|
55
HACKING
55
HACKING
|
@ -27,6 +27,57 @@ Making a new release
|
|||
|
||||
- Build and upload to pypi:
|
||||
|
||||
$ rm dist/*
|
||||
$ rm -rf dist/*
|
||||
$ python3 setup.py sdist
|
||||
$ twine upload --sign dist/*
|
||||
$ twine upload dist/*
|
||||
|
||||
- Push everything to git forge
|
||||
|
||||
$ git push
|
||||
|
||||
- Push to github
|
||||
|
||||
$ git push github
|
||||
|
||||
- Obtain img2pdf.exe from appveyor:
|
||||
|
||||
https://ci.appveyor.com/project/josch/img2pdf/
|
||||
|
||||
- Create new release:
|
||||
|
||||
https://gitlab.mister-muffin.de/josch/img2pdf/releases/new
|
||||
|
||||
Using debbisect to find regressions
|
||||
-----------------------------------
|
||||
|
||||
$ debbisect --cache=./cache --depends="git,ca-certificates,python3,
|
||||
ghostscript,imagemagick,mupdf-tools,poppler-utils,python3-pil,
|
||||
python3-pytest,python3-numpy,python3-scipy,python3-pikepdf" \
|
||||
--verbose 2023-09-16 2023-10-24 \
|
||||
'chroot "$1" sh -c "
|
||||
git clone https://gitlab.mister-muffin.de/josch/img2pdf.git
|
||||
&& cd img2pdf
|
||||
&& pytest 'src/img2pdf_test.py::test_jpg_2000_rgba8[internal]"'
|
||||
|
||||
Using debbisect cache
|
||||
---------------------
|
||||
|
||||
$ mmdebstrap --variant=apt --aptopt='Acquire::Check-Valid-Until "false"' \
|
||||
--include=git,ca-certificates,python3,ghostscript,imagemagick \
|
||||
--include=mupdf-tools,poppler-utils,python3-pil,python3-pytest \
|
||||
--include=python3-numpy,python3-scipy,python3-pikepdf \
|
||||
--hook-dir=/usr/share/mmdebstrap/hooks/file-mirror-automount \
|
||||
--setup-hook='mkdir -p "$1/home/josch/git/devscripts/cache/pool/"' \
|
||||
--setup-hook='mount -o ro,bind /home/josch/git/devscripts/cache/pool/ "$1/home/josch/git/devscripts/cache/pool/"' \
|
||||
--chrooted-customize-hook=bash
|
||||
unstable /dev/null
|
||||
file:///home/josch/git/devscripts/cache/archive/debian/20231022T090139Z/
|
||||
|
||||
Bisecting imagemagick
|
||||
---------------------
|
||||
|
||||
$ git clean -fdx && git reset --hard
|
||||
$ ./configure --prefix=$(pwd)/prefix
|
||||
$ make -j$(nproc)
|
||||
$ make install
|
||||
$ LD_LIBRARY_PATH=$(pwd)/prefix/lib prefix/bin/compare ...
|
||||
|
|
123
README.md
123
README.md
|
@ -1,5 +1,5 @@
|
|||
[](https://travis-ci.org/josch/img2pdf)
|
||||
[](https://ci.appveyor.com/project/josch/img2pdf/branch/master)
|
||||
[](https://app.travis-ci.com/josch/img2pdf)
|
||||
[](https://ci.appveyor.com/project/josch/img2pdf/branch/main)
|
||||
|
||||
img2pdf
|
||||
=======
|
||||
|
@ -27,18 +27,20 @@ software, because the raw pixel data never has to be loaded into memory.
|
|||
The following table shows how img2pdf handles different input depending on the
|
||||
input file format and image color space.
|
||||
|
||||
| Format | Colorspace | Result |
|
||||
| -------------------- | ------------------------------ | ------------- |
|
||||
| JPEG | any | direct |
|
||||
| JPEG2000 | any | direct |
|
||||
| PNG (non-interlaced) | any | direct |
|
||||
| TIFF (CCITT Group 4) | monochrome | direct |
|
||||
| any | any except CMYK and monochrome | PNG Paeth |
|
||||
| any | monochrome | CCITT Group 4 |
|
||||
| any | CMYK | flate |
|
||||
| Format | Colorspace | Result |
|
||||
| ------------------------------------- | ------------------------------------ | ------------- |
|
||||
| JPEG | any | direct |
|
||||
| JPEG2000 | any | direct |
|
||||
| PNG (non-interlaced, no transparency) | any | direct |
|
||||
| TIFF (CCITT Group 4) | 1-bit monochrome | direct |
|
||||
| JBIG2 (single-page generic coding) | 1-bit monochrome | direct |
|
||||
| any | any except CMYK and 1-bit monochrome | PNG Paeth |
|
||||
| any | 1-bit monochrome | CCITT Group 4 |
|
||||
| any | CMYK | flate |
|
||||
|
||||
For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4
|
||||
encoded data, img2pdf directly embeds the image data into the PDF without
|
||||
For JPEG, JPEG2000, non-interlaced PNG, TIFF images with CCITT Group 4
|
||||
encoded data, and JBIG2 with single-page generic coding (e.g. using `jbig2enc`),
|
||||
img2pdf directly embeds the image data into the PDF without
|
||||
re-encoding it. It thus treats the PDF format merely as a container format for
|
||||
the image data. In these cases, img2pdf only increases the filesize by the size
|
||||
of the PDF container (typically around 500 to 700 bytes). Since data is only
|
||||
|
@ -47,7 +49,7 @@ solutions for these input formats.
|
|||
|
||||
For all other input types, img2pdf first has to transform the pixel data to
|
||||
make it compatible with PDF. In most cases, the PNG Paeth filter is applied to
|
||||
the pixel data. For monochrome input, CCITT Group 4 is used instead. Only for
|
||||
the pixel data. For 1-bit monochrome input, CCITT Group 4 is used instead. Only for
|
||||
CMYK input no filter is applied before finally applying flate compression.
|
||||
|
||||
Usage
|
||||
|
@ -65,6 +67,12 @@ The detailed documentation can be accessed by running:
|
|||
|
||||
$ img2pdf --help
|
||||
|
||||
With no command line arguments supplied, img2pdf will read a single image from
|
||||
standard input and write the resulting PDF to standard output. Here is an
|
||||
example for how to scan directly to PDF using scanimage(1) from SANE:
|
||||
|
||||
$ scanimage --mode=Color --resolution=300 | pnmtojpeg -quality 90 | img2pdf > scan.pdf
|
||||
|
||||
Bugs
|
||||
----
|
||||
|
||||
|
@ -72,15 +80,15 @@ Bugs
|
|||
when embedded into the PDF cannot be read by the Adobe Acrobat Reader,
|
||||
please contact me.
|
||||
|
||||
- I have not yet figured out how to determine the colorspace of JPEG2000
|
||||
files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000
|
||||
files with other colorspaces, you must explicitly specify it using the
|
||||
`--colorspace` option.
|
||||
|
||||
- Input images with alpha channels are not allowed. PDF only supports
|
||||
transparency using binary masks but is unable to store 8-bit transparency
|
||||
information as part of the image itself. But img2pdf will always be lossless
|
||||
and thus, input images must not carry transparency information.
|
||||
- An error is produced if the input image is broken. This commonly happens if
|
||||
the input image has an invalid EXIF Orientation value of zero. Even though
|
||||
only nine different values from 1 to 9 are permitted, Anroid phones and
|
||||
Canon DSLR cameras produce JPEG images with the invalid value of zero.
|
||||
Either fix your input images with `exiftool` or similar software before
|
||||
passing the JPEG to `img2pdf` or run `img2pdf` with `--rotation=ifvalid`
|
||||
(if you run img2pdf from the commandline) or by passing
|
||||
`rotation=img2pdf.Rotation.ifvalid` as an argument to `convert()` when using
|
||||
img2pdf as a library.
|
||||
|
||||
- img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the
|
||||
input if necessary. To prevent decompression bomb denial of service attacks,
|
||||
|
@ -117,10 +125,9 @@ You can then test the converter using:
|
|||
|
||||
$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg
|
||||
|
||||
For Microsoft Windows users, PyInstaller based .exe files are produced by
|
||||
appveyor. If you don't want to install Python before using img2pdf you can head
|
||||
to appveyor and click on "Artifacts" to download the latest version:
|
||||
https://ci.appveyor.com/project/josch/img2pdf
|
||||
If you don't want to setup Python on Windows, then head to the
|
||||
[releases](https://gitlab.mister-muffin.de/josch/img2pdf/releases) section and download the latest
|
||||
`img2pdf.exe`.
|
||||
|
||||
GUI
|
||||
---
|
||||
|
@ -147,6 +154,10 @@ The package can also be used as a library:
|
|||
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
||||
f1.write(img2pdf.convert(f2))
|
||||
|
||||
# opening using pathlib
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert(pathlib.Path('test.jpg')))
|
||||
|
||||
# using in-memory image data
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert("\x89PNG...")
|
||||
|
@ -161,26 +172,26 @@ The package can also be used as a library:
|
|||
|
||||
# convert all files ending in .jpg inside a directory
|
||||
dirname = "/path/to/images"
|
||||
imgs = []
|
||||
for fname in os.listdir(dirname):
|
||||
if not fname.endswith(".jpg"):
|
||||
continue
|
||||
path = os.path.join(dirname, fname)
|
||||
if os.path.isdir(path):
|
||||
continue
|
||||
imgs.append(path)
|
||||
with open("name.pdf","wb") as f:
|
||||
imgs = []
|
||||
for fname in os.listdir(dirname):
|
||||
if not fname.endswith(".jpg"):
|
||||
continue
|
||||
path = os.path.join(dirname, fname)
|
||||
if os.path.isdir(path):
|
||||
continue
|
||||
imgs.append(path)
|
||||
f.write(img2pdf.convert(imgs))
|
||||
|
||||
# convert all files ending in .jpg in a directory and its subdirectories
|
||||
dirname = "/path/to/images"
|
||||
imgs = []
|
||||
for r, _, f in os.walk(dirname):
|
||||
for fname in f:
|
||||
if not fname.endswith(".jpg"):
|
||||
continue
|
||||
imgs.append(os.path.join(r, fname))
|
||||
with open("name.pdf","wb") as f:
|
||||
imgs = []
|
||||
for r, _, f in os.walk(dirname):
|
||||
for fname in f:
|
||||
if not fname.endswith(".jpg"):
|
||||
continue
|
||||
imgs.append(os.path.join(r, fname))
|
||||
f.write(img2pdf.convert(imgs))
|
||||
|
||||
|
||||
|
@ -189,6 +200,15 @@ The package can also be used as a library:
|
|||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert(glob.glob("/path/to/*.jpg")))
|
||||
|
||||
# convert all files matching a glob using pathlib.Path
|
||||
from pathlib import Path
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert(*Path("/path").glob("**/*.jpg")))
|
||||
|
||||
# ignore invalid rotation values in the input images
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert('test.jpg'), rotation=img2pdf.Rotation.ifvalid)
|
||||
|
||||
# writing to file descriptor
|
||||
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
||||
img2pdf.convert(f2, outputstream=f1)
|
||||
|
@ -199,6 +219,16 @@ The package can also be used as a library:
|
|||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun))
|
||||
|
||||
# use a fixed dpi of 300 instead of reading it from the image
|
||||
dpix = dpiy = 300
|
||||
layout_fun = img2pdf.get_fixed_dpi_layout_fun((dpix, dpiy))
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun))
|
||||
|
||||
# create a PDF/A-1b compliant document by passing an ICC profile
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert('test.jpg', pdfa="/usr/share/color/icc/sRGB.icc"))
|
||||
|
||||
Comparison to ImageMagick
|
||||
-------------------------
|
||||
|
||||
|
@ -286,3 +316,14 @@ Tesseract might not do a lossless conversion. For example it converts CMYK
|
|||
input to RGB and removes the alpha channel from images with transparency. For
|
||||
multipage TIFF or animated GIF, it will only convert the first frame.
|
||||
|
||||
Comparison to econvert from ExactImage
|
||||
--------------------------------------
|
||||
|
||||
Like pdflatex and podofoimg2pf, econvert is able to embed JPEG images into PDF
|
||||
directly without re-encoding but when given other file formats, it stores them
|
||||
just using flate compressen, which unnecessarily increases the filesize.
|
||||
Furthermore, it throws an error with CMYK TIF input. It also doesn't store CMYK
|
||||
jpeg files as CMYK but converts them to RGB, so it's not lossless. When trying
|
||||
to feed it 16bit files, it errors out with Unhandled bps/spp combination. It
|
||||
also seems to choose JPEG encoding when using it on some file types (like
|
||||
palette images) making it again not lossless for that input as well.
|
||||
|
|
10
appveyor.yml
10
appveyor.yml
|
@ -16,16 +16,18 @@ environment:
|
|||
- PYTHON: "C:\\Python37-x64"
|
||||
|
||||
install:
|
||||
- "%PYTHON%\\python.exe -m pip install tox wheel pyinstaller"
|
||||
- "%PYTHON%\\python.exe -m pip install tox wheel pyinstaller Pillow"
|
||||
|
||||
build: off
|
||||
|
||||
test_script:
|
||||
- "%PYTHON%\\python.exe -m tox"
|
||||
# don't run tests on windows because we don't have imagemagick
|
||||
#test_script:
|
||||
# - "%PYTHON%\\python.exe -m tox"
|
||||
|
||||
after_test:
|
||||
- "%PYTHON%\\python.exe setup.py bdist_wheel"
|
||||
- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole src/img2pdf.py"
|
||||
- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --console --nowindowed --name img2pdf src/img2pdf.py"
|
||||
#- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole --windowed --name img2pdf_windowed src/img2pdf.py"
|
||||
|
||||
artifacts:
|
||||
- path: dist\*
|
||||
|
|
353
magick.py
353
magick.py
|
@ -1,353 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import numpy
|
||||
import scipy.signal
|
||||
import zlib
|
||||
import struct
|
||||
|
||||
|
||||
def find_closest_palette_color(color, palette):
|
||||
if color.ndim == 0:
|
||||
idx = (numpy.abs(palette - color)).argmin()
|
||||
else:
|
||||
# naive distance function by computing the euclidean distance in RGB space
|
||||
idx = ((palette - color) ** 2).sum(axis=-1).argmin()
|
||||
return palette[idx]
|
||||
|
||||
|
||||
def floyd_steinberg(img, palette):
|
||||
result = numpy.array(img, copy=True)
|
||||
for y in range(result.shape[0]):
|
||||
for x in range(result.shape[1]):
|
||||
oldpixel = result[y, x]
|
||||
newpixel = find_closest_palette_color(oldpixel, palette)
|
||||
quant_error = oldpixel - newpixel
|
||||
result[y, x] = newpixel
|
||||
if x + 1 < result.shape[1]:
|
||||
result[y, x + 1] += quant_error * 7 / 16
|
||||
if y + 1 < result.shape[0]:
|
||||
result[y + 1, x - 1] += quant_error * 3 / 16
|
||||
result[y + 1, x] += quant_error * 5 / 16
|
||||
if x + 1 < result.shape[1] and y + 1 < result.shape[0]:
|
||||
result[y + 1, x + 1] += quant_error * 1 / 16
|
||||
return result
|
||||
|
||||
|
||||
def convolve_rgba(img, kernel):
|
||||
return numpy.stack(
|
||||
(
|
||||
scipy.signal.convolve2d(img[:, :, 0], kernel, "same"),
|
||||
scipy.signal.convolve2d(img[:, :, 1], kernel, "same"),
|
||||
scipy.signal.convolve2d(img[:, :, 2], kernel, "same"),
|
||||
scipy.signal.convolve2d(img[:, :, 3], kernel, "same"),
|
||||
),
|
||||
axis=-1,
|
||||
)
|
||||
|
||||
|
||||
def rgb2gray(img):
|
||||
result = numpy.zeros((60, 60), dtype=numpy.dtype("int64"))
|
||||
for y in range(img.shape[0]):
|
||||
for x in range(img.shape[1]):
|
||||
clin = sum(img[y, x] * [0.2126, 0.7152, 0.0722]) / 0xFFFF
|
||||
if clin <= 0.0031308:
|
||||
csrgb = 12.92 * clin
|
||||
else:
|
||||
csrgb = 1.055 * clin ** (1 / 2.4) - 0.055
|
||||
result[y, x] = csrgb * 0xFFFF
|
||||
return result
|
||||
|
||||
|
||||
def palettize(img, pal):
|
||||
result = numpy.zeros((img.shape[0], img.shape[1]), dtype=numpy.dtype("int64"))
|
||||
for y in range(img.shape[0]):
|
||||
for x in range(img.shape[1]):
|
||||
for i, col in enumerate(pal):
|
||||
if numpy.array_equal(img[y, x], col):
|
||||
result[y, x] = i
|
||||
break
|
||||
else:
|
||||
raise Exception()
|
||||
return result
|
||||
|
||||
|
||||
# we cannot use zlib.compress() because different compressors may compress the
|
||||
# same data differently, for example by using different optimizations on
|
||||
# different architectures:
|
||||
# https://lists.fedoraproject.org/archives/list/devel@lists.fedoraproject.org/thread/R7GD4L5Z6HELCDAL2RDESWR2F3ZXHWVX/
|
||||
#
|
||||
# to make the compressed representation of the uncompressed data bit-by-bit
|
||||
# identical on all platforms we make use of the compression method 0, that is,
|
||||
# no compression at all :)
|
||||
def compress(data):
|
||||
# two-byte zlib header (rfc1950)
|
||||
# common header for lowest compression level
|
||||
# bits 0-3: Compression info, base-2 logarithm of the LZ77 window size,
|
||||
# minus eight -- 7 indicates a 32K window size
|
||||
# bits 4-7: Compression method -- 8 is deflate
|
||||
# bits 8-9: Compression level -- 0 is fastest
|
||||
# bit 10: preset dictionary -- 0 is none
|
||||
# bits 11-15: check bits so that the 16-bit unsigned integer stored in MSB
|
||||
# order is a multiple of 31
|
||||
result = b"\x78\x01"
|
||||
# content is stored in deflate format (rfc1951)
|
||||
# maximum chunk size is the largest 16 bit unsigned integer
|
||||
chunksize = 0xFFFF
|
||||
for i in range(0, len(data), chunksize):
|
||||
# bits 0-4 are unused
|
||||
# bits 5-6 indicate compression method -- 0 is no compression
|
||||
# bit 7 indicates the last chunk
|
||||
if i * chunksize < len(data) - chunksize:
|
||||
result += b"\x00"
|
||||
else:
|
||||
# last chunck
|
||||
result += b"\x01"
|
||||
chunk = data[i : i + chunksize]
|
||||
# the chunk length as little endian 16 bit unsigned integer
|
||||
result += struct.pack("<H", len(chunk))
|
||||
# the one's complement of the chunk length
|
||||
# one's complement is all bits inverted which is the result of
|
||||
# xor with 0xffff for a 16 bit unsigned integer
|
||||
result += struct.pack("<H", len(chunk) ^ 0xFFFF)
|
||||
result += chunk
|
||||
# adler32 checksum of the uncompressed data as big endian 32 bit unsigned
|
||||
# integer
|
||||
result += struct.pack(">I", zlib.adler32(data))
|
||||
return result
|
||||
|
||||
|
||||
def write_png(data, path, bitdepth, colortype, palette=None):
|
||||
with open(path, "wb") as f:
|
||||
f.write(b"\x89PNG\r\n\x1A\n")
|
||||
# PNG image type Colour type Allowed bit depths
|
||||
# Greyscale 0 1, 2, 4, 8, 16
|
||||
# Truecolour 2 8, 16
|
||||
# Indexed-colour 3 1, 2, 4, 8
|
||||
# Greyscale with alpha 4 8, 16
|
||||
# Truecolour with alpha 6 8, 16
|
||||
block = b"IHDR" + struct.pack(
|
||||
">IIBBBBB",
|
||||
data.shape[1], # width
|
||||
data.shape[0], # height
|
||||
bitdepth, # bitdepth
|
||||
colortype, # colortype
|
||||
0, # compression
|
||||
0, # filtertype
|
||||
0, # interlaced
|
||||
)
|
||||
f.write(
|
||||
struct.pack(">I", len(block) - 4)
|
||||
+ block
|
||||
+ struct.pack(">I", zlib.crc32(block))
|
||||
)
|
||||
if palette is not None:
|
||||
block = b"PLTE"
|
||||
for col in palette:
|
||||
block += struct.pack(">BBB", col[0], col[1], col[2])
|
||||
f.write(
|
||||
struct.pack(">I", len(block) - 4)
|
||||
+ block
|
||||
+ struct.pack(">I", zlib.crc32(block))
|
||||
)
|
||||
raw = b""
|
||||
for y in range(data.shape[0]):
|
||||
raw += b"\0"
|
||||
if bitdepth == 16:
|
||||
raw += data[y].astype(">u2").tobytes()
|
||||
elif bitdepth == 8:
|
||||
raw += data[y].astype(">u1").tobytes()
|
||||
elif bitdepth in [4, 2, 1]:
|
||||
valsperbyte = 8 // bitdepth
|
||||
for x in range(0, data.shape[1], valsperbyte):
|
||||
val = 0
|
||||
for j in range(valsperbyte):
|
||||
if x + j >= data.shape[1]:
|
||||
break
|
||||
val |= (data[y, x + j].astype(">u2") & (2 ** bitdepth - 1)) << (
|
||||
(valsperbyte - j - 1) * bitdepth
|
||||
)
|
||||
raw += struct.pack(">B", val)
|
||||
else:
|
||||
raise Exception()
|
||||
compressed = compress(raw)
|
||||
block = b"IDAT" + compressed
|
||||
f.write(
|
||||
struct.pack(">I", len(compressed))
|
||||
+ block
|
||||
+ struct.pack(">I", zlib.crc32(block))
|
||||
)
|
||||
block = b"IEND"
|
||||
f.write(struct.pack(">I", 0) + block + struct.pack(">I", zlib.crc32(block)))
|
||||
|
||||
|
||||
def main():
|
||||
outdir = sys.argv[1]
|
||||
|
||||
# create a 256 color palette by first writing 16 shades of gray
|
||||
# and then writing an array of RGB colors with 6, 8 and 5 levels
|
||||
# for red, green and blue, respectively
|
||||
pal8 = numpy.zeros((256, 3), dtype=numpy.dtype("int64"))
|
||||
i = 0
|
||||
for gray in range(15, 255, 15):
|
||||
pal8[i] = [gray, gray, gray]
|
||||
i += 1
|
||||
for red in 0, 0x33, 0x66, 0x99, 0xCC, 0xFF:
|
||||
for green in 0, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF:
|
||||
for blue in 0, 0x40, 0x80, 0xBF, 0xFF:
|
||||
pal8[i] = [red, green, blue]
|
||||
i += 1
|
||||
assert i == 256
|
||||
|
||||
# windows 16 color palette
|
||||
pal4 = numpy.array(
|
||||
[
|
||||
[0x00, 0x00, 0x00],
|
||||
[0x80, 0x00, 0x00],
|
||||
[0x00, 0x80, 0x00],
|
||||
[0x80, 0x80, 0x00],
|
||||
[0x00, 0x00, 0x80],
|
||||
[0x80, 0x00, 0x80],
|
||||
[0x00, 0x80, 0x80],
|
||||
[0xC0, 0xC0, 0xC0],
|
||||
[0x80, 0x80, 0x80],
|
||||
[0xFF, 0x00, 0x00],
|
||||
[0x00, 0xFF, 0x00],
|
||||
[0xFF, 0x00, 0x00],
|
||||
[0x00, 0xFF, 0x00],
|
||||
[0xFF, 0x00, 0xFF],
|
||||
[0x00, 0xFF, 0x00],
|
||||
[0xFF, 0xFF, 0xFF],
|
||||
],
|
||||
dtype=numpy.dtype("int64"),
|
||||
)
|
||||
|
||||
# choose values slightly off red, lime and blue because otherwise
|
||||
# imagemagick will classify the image as Depth: 8/1-bit
|
||||
pal2 = numpy.array(
|
||||
[[0, 0, 0], [0xFE, 0, 0], [0, 0xFE, 0], [0, 0, 0xFE]],
|
||||
dtype=numpy.dtype("int64"),
|
||||
)
|
||||
|
||||
# don't choose black and white or otherwise imagemagick will classify the
|
||||
# image as bilevel with 8/1-bit depth instead of palette with 8-bit color
|
||||
# don't choose gray colors or otherwise imagemagick will classify the
|
||||
# image as grayscale
|
||||
pal1 = numpy.array(
|
||||
[[0x01, 0x02, 0x03], [0xFE, 0xFD, 0xFC]], dtype=numpy.dtype("int64")
|
||||
)
|
||||
|
||||
# gaussian kernel with sigma=3
|
||||
kernel = numpy.array(
|
||||
[
|
||||
[0.011362, 0.014962, 0.017649, 0.018648, 0.017649, 0.014962, 0.011362],
|
||||
[0.014962, 0.019703, 0.02324, 0.024556, 0.02324, 0.019703, 0.014962],
|
||||
[0.017649, 0.02324, 0.027413, 0.028964, 0.027413, 0.02324, 0.017649],
|
||||
[0.018648, 0.024556, 0.028964, 0.030603, 0.028964, 0.024556, 0.018648],
|
||||
[0.017649, 0.02324, 0.027413, 0.028964, 0.027413, 0.02324, 0.017649],
|
||||
[0.014962, 0.019703, 0.02324, 0.024556, 0.02324, 0.019703, 0.014962],
|
||||
[0.011362, 0.014962, 0.017649, 0.018648, 0.017649, 0.014962, 0.011362],
|
||||
],
|
||||
numpy.float,
|
||||
)
|
||||
|
||||
# constructs a 2D array of a circle with a width of 36
|
||||
circle = list()
|
||||
offsets_36 = [14, 11, 9, 7, 6, 5, 4, 3, 3, 2, 2, 1, 1, 1, 0, 0, 0, 0]
|
||||
for offs in offsets_36 + offsets_36[::-1]:
|
||||
circle.append([0] * offs + [1] * (len(offsets_36) - offs) * 2 + [0] * offs)
|
||||
|
||||
alpha = numpy.zeros((60, 60, 4), dtype=numpy.dtype("int64"))
|
||||
|
||||
# draw three circles
|
||||
for (xpos, ypos, color) in [
|
||||
(12, 3, [0xFFFF, 0, 0, 0xFFFF]),
|
||||
(21, 21, [0, 0xFFFF, 0, 0xFFFF]),
|
||||
(3, 21, [0, 0, 0xFFFF, 0xFFFF]),
|
||||
]:
|
||||
for x, row in enumerate(circle):
|
||||
for y, pos in enumerate(row):
|
||||
if pos:
|
||||
alpha[y + ypos, x + xpos] += color
|
||||
alpha = numpy.clip(alpha, 0, 0xFFFF)
|
||||
alpha = convolve_rgba(alpha, kernel)
|
||||
|
||||
write_png(alpha, outdir + "/alpha.png", 16, 6)
|
||||
|
||||
normal16 = alpha[:, :, 0:3]
|
||||
write_png(normal16, outdir + "/normal16.png", 16, 2)
|
||||
|
||||
write_png(normal16 / 0xFFFF * 0xFF, outdir + "/normal.png", 8, 2)
|
||||
|
||||
write_png(0xFF - normal16 / 0xFFFF * 0xFF, outdir + "/inverse.png", 8, 2)
|
||||
|
||||
gray16 = rgb2gray(normal16)
|
||||
|
||||
write_png(gray16, outdir + "/gray16.png", 16, 0)
|
||||
|
||||
write_png(gray16 / 0xFFFF * 0xFF, outdir + "/gray8.png", 8, 0)
|
||||
|
||||
write_png(
|
||||
floyd_steinberg(gray16, numpy.arange(16) / 0xF * 0xFFFF) / 0xFFFF * 0xF,
|
||||
outdir + "/gray4.png",
|
||||
4,
|
||||
0,
|
||||
)
|
||||
|
||||
write_png(
|
||||
floyd_steinberg(gray16, numpy.arange(4) / 0x3 * 0xFFFF) / 0xFFFF * 0x3,
|
||||
outdir + "/gray2.png",
|
||||
2,
|
||||
0,
|
||||
)
|
||||
|
||||
write_png(
|
||||
floyd_steinberg(gray16, numpy.arange(2) / 0x1 * 0xFFFF) / 0xFFFF * 0x1,
|
||||
outdir + "/gray1.png",
|
||||
1,
|
||||
0,
|
||||
)
|
||||
|
||||
write_png(
|
||||
palettize(
|
||||
floyd_steinberg(normal16, pal8 * 0xFFFF / 0xFF) / 0xFFFF * 0xFF, pal8
|
||||
),
|
||||
outdir + "/palette8.png",
|
||||
8,
|
||||
3,
|
||||
pal8,
|
||||
)
|
||||
|
||||
write_png(
|
||||
palettize(
|
||||
floyd_steinberg(normal16, pal4 * 0xFFFF / 0xFF) / 0xFFFF * 0xFF, pal4
|
||||
),
|
||||
outdir + "/palette4.png",
|
||||
4,
|
||||
3,
|
||||
pal4,
|
||||
)
|
||||
|
||||
write_png(
|
||||
palettize(
|
||||
floyd_steinberg(normal16, pal2 * 0xFFFF / 0xFF) / 0xFFFF * 0xFF, pal2
|
||||
),
|
||||
outdir + "/palette2.png",
|
||||
2,
|
||||
3,
|
||||
pal2,
|
||||
)
|
||||
|
||||
write_png(
|
||||
palettize(
|
||||
floyd_steinberg(normal16, pal1 * 0xFFFF / 0xFF) / 0xFFFF * 0xFF, pal1
|
||||
),
|
||||
outdir + "/palette1.png",
|
||||
1,
|
||||
3,
|
||||
pal1,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,2 +0,0 @@
|
|||
[metadata]
|
||||
description-file = README.md
|
8
setup.py
8
setup.py
|
@ -1,7 +1,7 @@
|
|||
import sys
|
||||
from setuptools import setup
|
||||
|
||||
VERSION = "0.4.0"
|
||||
VERSION = "0.6.0"
|
||||
|
||||
INSTALL_REQUIRES = (
|
||||
"Pillow",
|
||||
|
@ -11,7 +11,7 @@ INSTALL_REQUIRES = (
|
|||
setup(
|
||||
name="img2pdf",
|
||||
version=VERSION,
|
||||
author="Johannes 'josch' Schauer",
|
||||
author="Johannes Schauer Marin Rodrigues",
|
||||
author_email="josch@mister-muffin.de",
|
||||
description="Convert images to PDF via direct JPEG inclusion.",
|
||||
long_description=open("README.md").read(),
|
||||
|
@ -40,7 +40,9 @@ setup(
|
|||
include_package_data=True,
|
||||
zip_safe=True,
|
||||
install_requires=INSTALL_REQUIRES,
|
||||
extras_require={"gui": ("tkinter"),},
|
||||
extras_require={
|
||||
"gui": ("tkinter"),
|
||||
},
|
||||
entry_points={
|
||||
"setuptools.installation": ["eggsecutable = img2pdf:main"],
|
||||
"console_scripts": ["img2pdf = img2pdf:main"],
|
||||
|
|
1433
src/img2pdf.py
1433
src/img2pdf.py
File diff suppressed because it is too large
Load diff
2440
src/img2pdf_test.py
Normal file → Executable file
2440
src/img2pdf_test.py
Normal file → Executable file
File diff suppressed because it is too large
Load diff
57
src/jp2.py
57
src/jp2.py
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright (C) 2013 Johannes 'josch' Schauer <j.schauer at email.de>
|
||||
# Copyright (C) 2013 Johannes Schauer Marin Rodrigues <j.schauer at email.de>
|
||||
#
|
||||
# this module is heavily based upon jpylyzer which is
|
||||
# KB / National Library of the Netherlands, Open Planets Foundation
|
||||
|
@ -37,9 +37,8 @@ def getBox(data, byteStart, noBytes):
|
|||
|
||||
|
||||
def parse_ihdr(data):
|
||||
height = struct.unpack(">I", data[0:4])[0]
|
||||
width = struct.unpack(">I", data[4:8])[0]
|
||||
return width, height
|
||||
height, width, channels, bpp = struct.unpack(">IIHB", data[:11])
|
||||
return width, height, channels, bpp + 1
|
||||
|
||||
|
||||
def parse_colr(data):
|
||||
|
@ -59,8 +58,8 @@ def parse_colr(data):
|
|||
|
||||
def parse_resc(data):
|
||||
hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data)
|
||||
hdpi = ((hnum / hden) * (10 ** hexp) * 100) / 2.54
|
||||
vdpi = ((vnum / vden) * (10 ** vexp) * 100) / 2.54
|
||||
hdpi = ((hnum / hden) * (10**hexp) * 100) / 2.54
|
||||
vdpi = ((vnum / vden) * (10**vexp) * 100) / 2.54
|
||||
return hdpi, vdpi
|
||||
|
||||
|
||||
|
@ -85,13 +84,13 @@ def parse_jp2h(data):
|
|||
while byteStart < noBytes and boxLengthValue != 0:
|
||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||
if boxType == b"ihdr":
|
||||
width, height = parse_ihdr(boxContents)
|
||||
width, height, channels, bpp = parse_ihdr(boxContents)
|
||||
elif boxType == b"colr":
|
||||
colorspace = parse_colr(boxContents)
|
||||
elif boxType == b"res ":
|
||||
hdpi, vdpi = parse_res(boxContents)
|
||||
byteStart = byteEnd
|
||||
return (width, height, colorspace, hdpi, vdpi)
|
||||
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||
|
||||
|
||||
def parsejp2(data):
|
||||
|
@ -102,7 +101,9 @@ def parsejp2(data):
|
|||
while byteStart < noBytes and boxLengthValue != 0:
|
||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||
if boxType == b"jp2h":
|
||||
width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents)
|
||||
width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h(
|
||||
boxContents
|
||||
)
|
||||
break
|
||||
byteStart = byteEnd
|
||||
if not width:
|
||||
|
@ -112,13 +113,41 @@ def parsejp2(data):
|
|||
if not colorspace:
|
||||
raise Exception("no colorspace in jp2 header")
|
||||
# retrieving the dpi is optional so we do not error out if not present
|
||||
return (width, height, colorspace, hdpi, vdpi)
|
||||
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||
|
||||
|
||||
def parsej2k(data):
|
||||
lsiz, rsiz, xsiz, ysiz, xosiz, yosiz, _, _, _, _, csiz = struct.unpack(
|
||||
">HHIIIIIIIIH", data[4:42]
|
||||
)
|
||||
ssiz = [None] * csiz
|
||||
xrsiz = [None] * csiz
|
||||
yrsiz = [None] * csiz
|
||||
for i in range(csiz):
|
||||
ssiz[i], xrsiz[i], yrsiz[i] = struct.unpack(
|
||||
"BBB", data[42 + 3 * i : 42 + 3 * (i + 1)]
|
||||
)
|
||||
assert ssiz == [7, 7, 7]
|
||||
return xsiz - xosiz, ysiz - yosiz, None, None, None, csiz, 8
|
||||
|
||||
|
||||
def parse(data):
|
||||
if data[:4] == b"\xff\x4f\xff\x51":
|
||||
return parsej2k(data)
|
||||
else:
|
||||
return parsejp2(data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
width, height, colorspace = parsejp2(open(sys.argv[1]).read())
|
||||
sys.stdout.write("width = %d" % width)
|
||||
sys.stdout.write("height = %d" % height)
|
||||
sys.stdout.write("colorspace = %s" % colorspace)
|
||||
width, height, colorspace, hdpi, vdpi, channels, bpp = parse(
|
||||
open(sys.argv[1], "rb").read()
|
||||
)
|
||||
print("width = %d" % width)
|
||||
print("height = %d" % height)
|
||||
print("colorspace = %s" % colorspace)
|
||||
print("hdpi = %s" % hdpi)
|
||||
print("vdpi = %s" % vdpi)
|
||||
print("channels = %s" % channels)
|
||||
print("bpp = %s" % bpp)
|
||||
|
|
Binary file not shown.
Before ![]() (image error) Size: 1.9 KiB After ![]() (image error) Size: 1.9 KiB ![]() ![]() |
BIN
src/tests/input/mono.jb2
Normal file
BIN
src/tests/input/mono.jb2
Normal file
Binary file not shown.
Binary file not shown.
BIN
src/tests/output/mono.jb2.pdf
Normal file
BIN
src/tests/output/mono.jb2.pdf
Normal file
Binary file not shown.
4
tox.ini
4
tox.ini
|
@ -4,7 +4,7 @@
|
|||
# and then run "tox" from this directory.
|
||||
|
||||
[tox]
|
||||
envlist = py35, py36, py37, py38
|
||||
envlist = py37, py38, py39, py310
|
||||
skip_missing_interpreters = true
|
||||
|
||||
[testenv]
|
||||
|
@ -15,4 +15,4 @@ deps =
|
|||
numpy
|
||||
scipy
|
||||
commands =
|
||||
python -m pytest
|
||||
python -m pytest -vv
|
||||
|
|
Loading…
Reference in a new issue