Compare commits
No commits in common. "main" and "main" have entirely different histories.
14 changed files with 874 additions and 2134 deletions
3
.mailmap
3
.mailmap
|
@ -1,3 +0,0 @@
|
|||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
|
||||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <j.schauer@email.de>
|
||||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <josch@pyneo.org>
|
|
@ -13,6 +13,7 @@ matrix:
|
|||
- netpbm
|
||||
- ghostscript
|
||||
- mupdf-tools
|
||||
- icc-profiles-free
|
||||
- name: "python 3.9 Windows"
|
||||
os: windows
|
||||
language: shell # 'language: python' is an error on Travis CI Windows
|
||||
|
|
39
CHANGES.rst
39
CHANGES.rst
|
@ -2,44 +2,7 @@
|
|||
CHANGES
|
||||
=======
|
||||
|
||||
0.5.1 (2023-11-26)
|
||||
------------------
|
||||
|
||||
- no default ICC profile location for PDF/A-1b on Windows
|
||||
- workaround for PNG input without dpi units but non-square dpi aspect ratio
|
||||
|
||||
0.5.0 (2023-10-28)
|
||||
------------------
|
||||
|
||||
- support MIFF for 16 bit CMYK input
|
||||
- accept pathlib.Path objects as input
|
||||
- don't store RGB ICC profiles from bilevel or grayscale TIFF, PNG and JPEG
|
||||
- thumbnails are no longer included by default and --include-thumbnails has to
|
||||
be used if you want them
|
||||
- support for pikepdf (>= 6.2.0)
|
||||
|
||||
0.4.4 (2022-04-07)
|
||||
------------------
|
||||
|
||||
- --viewer-page-layout support for twopageright and twopageleft
|
||||
- Add B and JB paper sizes
|
||||
- support for pikepdf (>= 5.0.0) and Pillow (>= 9.1.0)
|
||||
|
||||
0.4.3 (2021-10-24)
|
||||
------------------
|
||||
|
||||
- fix --viewer-initial-page (broken in last release)
|
||||
|
||||
0.4.2 (2021-10-11)
|
||||
------------------
|
||||
|
||||
- add --rotation
|
||||
- allow palette PNG images with ICC profile
|
||||
- sort globbing result on windows
|
||||
- convert 8-bit PNG alpha channels to /SMasks in PDF
|
||||
- remove pdfrw from tests
|
||||
|
||||
0.4.1 (2021-05-09)
|
||||
0.4.1 (2020-05-09)
|
||||
------------------
|
||||
|
||||
- support wildcards in paths on windows
|
||||
|
|
39
HACKING
39
HACKING
|
@ -27,41 +27,6 @@ Making a new release
|
|||
|
||||
- Build and upload to pypi:
|
||||
|
||||
$ rm -rf dist/*
|
||||
$ rm dist/*
|
||||
$ python3 setup.py sdist
|
||||
$ twine upload dist/*
|
||||
|
||||
Using debbisect to find regressions
|
||||
-----------------------------------
|
||||
|
||||
$ debbisect --cache=./cache --depends="git,ca-certificates,python3,
|
||||
ghostscript,imagemagick,mupdf-tools,poppler-utils,python3-pil,
|
||||
python3-pytest,python3-numpy,python3-scipy,python3-pikepdf" \
|
||||
--verbose 2023-09-16 2023-10-24 \
|
||||
'chroot "$1" sh -c "
|
||||
git clone https://gitlab.mister-muffin.de/josch/img2pdf.git
|
||||
&& cd img2pdf
|
||||
&& pytest 'src/img2pdf_test.py::test_jpg_2000_rgba8[internal]"'
|
||||
|
||||
Using debbisect cache
|
||||
---------------------
|
||||
|
||||
$ mmdebstrap --variant=apt --aptopt='Acquire::Check-Valid-Until "false"' \
|
||||
--include=git,ca-certificates,python3,ghostscript,imagemagick \
|
||||
--include=mupdf-tools,poppler-utils,python3-pil,python3-pytest \
|
||||
--include=python3-numpy,python3-scipy,python3-pikepdf \
|
||||
--hook-dir=/usr/share/mmdebstrap/hooks/file-mirror-automount \
|
||||
--setup-hook='mkdir -p "$1/home/josch/git/devscripts/cache/pool/"' \
|
||||
--setup-hook='mount -o ro,bind /home/josch/git/devscripts/cache/pool/ "$1/home/josch/git/devscripts/cache/pool/"' \
|
||||
--chrooted-customize-hook=bash
|
||||
unstable /dev/null
|
||||
file:///home/josch/git/devscripts/cache/archive/debian/20231022T090139Z/
|
||||
|
||||
Bisecting imagemagick
|
||||
---------------------
|
||||
|
||||
$ git clean -fdx && git reset --hard
|
||||
$ ./configure --prefix=$(pwd)/prefix
|
||||
$ make -j$(nproc)
|
||||
$ make install
|
||||
$ LD_LIBRARY_PATH=$(pwd)/prefix/lib prefix/bin/compare ...
|
||||
$ twine upload --sign dist/*
|
||||
|
|
71
README.md
71
README.md
|
@ -1,5 +1,5 @@
|
|||
[![Travis Status](https://travis-ci.com/josch/img2pdf.svg?branch=main)](https://app.travis-ci.com/josch/img2pdf)
|
||||
[![Appveyor Status](https://ci.appveyor.com/api/projects/status/2kws3wkqvi526llj/branch/main?svg=true)](https://ci.appveyor.com/project/josch/img2pdf/branch/main)
|
||||
[![Travis Status](https://travis-ci.org/josch/img2pdf.svg?branch=master)](https://travis-ci.org/josch/img2pdf)
|
||||
[![Appveyor Status](https://ci.appveyor.com/api/projects/status/2kws3wkqvi526llj/branch/master?svg=true)](https://ci.appveyor.com/project/josch/img2pdf/branch/master)
|
||||
|
||||
img2pdf
|
||||
=======
|
||||
|
@ -27,15 +27,15 @@ software, because the raw pixel data never has to be loaded into memory.
|
|||
The following table shows how img2pdf handles different input depending on the
|
||||
input file format and image color space.
|
||||
|
||||
| Format | Colorspace | Result |
|
||||
| ------------------------------------- | ------------------------------ | ------------- |
|
||||
| JPEG | any | direct |
|
||||
| JPEG2000 | any | direct |
|
||||
| PNG (non-interlaced, no transparency) | any | direct |
|
||||
| TIFF (CCITT Group 4) | monochrome | direct |
|
||||
| any | any except CMYK and monochrome | PNG Paeth |
|
||||
| any | monochrome | CCITT Group 4 |
|
||||
| any | CMYK | flate |
|
||||
| Format | Colorspace | Result |
|
||||
| -------------------- | ------------------------------ | ------------- |
|
||||
| JPEG | any | direct |
|
||||
| JPEG2000 | any | direct |
|
||||
| PNG (non-interlaced) | any | direct |
|
||||
| TIFF (CCITT Group 4) | monochrome | direct |
|
||||
| any | any except CMYK and monochrome | PNG Paeth |
|
||||
| any | monochrome | CCITT Group 4 |
|
||||
| any | CMYK | flate |
|
||||
|
||||
For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4
|
||||
encoded data, img2pdf directly embeds the image data into the PDF without
|
||||
|
@ -72,15 +72,15 @@ Bugs
|
|||
when embedded into the PDF cannot be read by the Adobe Acrobat Reader,
|
||||
please contact me.
|
||||
|
||||
- An error is produced if the input image is broken. This commonly happens if
|
||||
the input image has an invalid EXIF Orientation value of zero. Even though
|
||||
only nine different values from 1 to 9 are permitted, Anroid phones and
|
||||
Canon DSLR cameras produce JPEG images with the invalid value of zero.
|
||||
Either fix your input images with `exiftool` or similar software before
|
||||
passing the JPEG to `img2pdf` or run `img2pdf` with `--rotation=ifvalid`
|
||||
(if you run img2pdf from the commandline) or by passing
|
||||
`rotation=img2pdf.Rotation.ifvalid` as an argument to `convert()` when using
|
||||
img2pdf as a library.
|
||||
- I have not yet figured out how to determine the colorspace of JPEG2000
|
||||
files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000
|
||||
files with other colorspaces, you must explicitly specify it using the
|
||||
`--colorspace` option.
|
||||
|
||||
- Input images with alpha channels are not allowed. PDF only supports
|
||||
transparency using binary masks but is unable to store 8-bit transparency
|
||||
information as part of the image itself. But img2pdf will always be lossless
|
||||
and thus, input images must not carry transparency information.
|
||||
|
||||
- img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the
|
||||
input if necessary. To prevent decompression bomb denial of service attacks,
|
||||
|
@ -117,9 +117,10 @@ You can then test the converter using:
|
|||
|
||||
$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg
|
||||
|
||||
If you don't want to setup Python on Windows, then head to the
|
||||
[releases](/josch/img2pdf/releases) section and download the latest
|
||||
`img2pdf.exe`.
|
||||
For Microsoft Windows users, PyInstaller based .exe files are produced by
|
||||
appveyor. If you don't want to install Python before using img2pdf you can head
|
||||
to appveyor and click on "Artifacts" to download the latest version:
|
||||
https://ci.appveyor.com/project/josch/img2pdf
|
||||
|
||||
GUI
|
||||
---
|
||||
|
@ -146,10 +147,6 @@ The package can also be used as a library:
|
|||
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
||||
f1.write(img2pdf.convert(f2))
|
||||
|
||||
# opening using pathlib
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert(pathlib.Path('test.jpg')))
|
||||
|
||||
# using in-memory image data
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert("\x89PNG...")
|
||||
|
@ -192,15 +189,6 @@ The package can also be used as a library:
|
|||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert(glob.glob("/path/to/*.jpg")))
|
||||
|
||||
# convert all files matching a glob using pathlib.Path
|
||||
from pathlib import Path
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert(*Path("/path").glob("**/*.jpg")))
|
||||
|
||||
# ignore invalid rotation values in the input images
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert('test.jpg'), rotation=img2pdf.Rotation.ifvalid)
|
||||
|
||||
# writing to file descriptor
|
||||
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
||||
img2pdf.convert(f2, outputstream=f1)
|
||||
|
@ -308,14 +296,3 @@ Tesseract might not do a lossless conversion. For example it converts CMYK
|
|||
input to RGB and removes the alpha channel from images with transparency. For
|
||||
multipage TIFF or animated GIF, it will only convert the first frame.
|
||||
|
||||
Comparison to econvert from ExactImage
|
||||
--------------------------------------
|
||||
|
||||
Like pdflatex and podofoimg2pf, econvert is able to embed JPEG images into PDF
|
||||
directly without re-encoding but when given other file formats, it stores them
|
||||
just using flate compressen, which unnecessarily increases the filesize.
|
||||
Furthermore, it throws an error with CMYK TIF input. It also doesn't store CMYK
|
||||
jpeg files as CMYK but converts them to RGB, so it's not lossless. When trying
|
||||
to feed it 16bit files, it errors out with Unhandled bps/spp combination. It
|
||||
also seems to choose JPEG encoding when using it on some file types (like
|
||||
palette images) making it again not lossless for that input as well.
|
||||
|
|
10
appveyor.yml
10
appveyor.yml
|
@ -16,18 +16,16 @@ environment:
|
|||
- PYTHON: "C:\\Python37-x64"
|
||||
|
||||
install:
|
||||
- "%PYTHON%\\python.exe -m pip install tox wheel pyinstaller Pillow"
|
||||
- "%PYTHON%\\python.exe -m pip install tox wheel pyinstaller"
|
||||
|
||||
build: off
|
||||
|
||||
# don't run tests on windows because we don't have imagemagick
|
||||
#test_script:
|
||||
# - "%PYTHON%\\python.exe -m tox"
|
||||
test_script:
|
||||
- "%PYTHON%\\python.exe -m tox"
|
||||
|
||||
after_test:
|
||||
- "%PYTHON%\\python.exe setup.py bdist_wheel"
|
||||
- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --console --nowindowed --name img2pdf src/img2pdf.py"
|
||||
#- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole --windowed --name img2pdf_windowed src/img2pdf.py"
|
||||
- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole src/img2pdf.py"
|
||||
|
||||
artifacts:
|
||||
- path: dist\*
|
||||
|
|
2
setup.cfg
Normal file
2
setup.cfg
Normal file
|
@ -0,0 +1,2 @@
|
|||
[metadata]
|
||||
description-file = README.md
|
4
setup.py
4
setup.py
|
@ -1,7 +1,7 @@
|
|||
import sys
|
||||
from setuptools import setup
|
||||
|
||||
VERSION = "0.5.1"
|
||||
VERSION = "0.4.1"
|
||||
|
||||
INSTALL_REQUIRES = (
|
||||
"Pillow",
|
||||
|
@ -11,7 +11,7 @@ INSTALL_REQUIRES = (
|
|||
setup(
|
||||
name="img2pdf",
|
||||
version=VERSION,
|
||||
author="Johannes Schauer Marin Rodrigues",
|
||||
author="Johannes 'josch' Schauer",
|
||||
author_email="josch@mister-muffin.de",
|
||||
description="Convert images to PDF via direct JPEG inclusion.",
|
||||
long_description=open("README.md").read(),
|
||||
|
|
901
src/img2pdf.py
901
src/img2pdf.py
File diff suppressed because it is too large
Load diff
1879
src/img2pdf_test.py
1879
src/img2pdf_test.py
File diff suppressed because it is too large
Load diff
57
src/jp2.py
57
src/jp2.py
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright (C) 2013 Johannes Schauer Marin Rodrigues <j.schauer at email.de>
|
||||
# Copyright (C) 2013 Johannes 'josch' Schauer <j.schauer at email.de>
|
||||
#
|
||||
# this module is heavily based upon jpylyzer which is
|
||||
# KB / National Library of the Netherlands, Open Planets Foundation
|
||||
|
@ -37,8 +37,9 @@ def getBox(data, byteStart, noBytes):
|
|||
|
||||
|
||||
def parse_ihdr(data):
|
||||
height, width, channels, bpp = struct.unpack(">IIHB", data[:11])
|
||||
return width, height, channels, bpp + 1
|
||||
height = struct.unpack(">I", data[0:4])[0]
|
||||
width = struct.unpack(">I", data[4:8])[0]
|
||||
return width, height
|
||||
|
||||
|
||||
def parse_colr(data):
|
||||
|
@ -58,8 +59,8 @@ def parse_colr(data):
|
|||
|
||||
def parse_resc(data):
|
||||
hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data)
|
||||
hdpi = ((hnum / hden) * (10**hexp) * 100) / 2.54
|
||||
vdpi = ((vnum / vden) * (10**vexp) * 100) / 2.54
|
||||
hdpi = ((hnum / hden) * (10 ** hexp) * 100) / 2.54
|
||||
vdpi = ((vnum / vden) * (10 ** vexp) * 100) / 2.54
|
||||
return hdpi, vdpi
|
||||
|
||||
|
||||
|
@ -84,13 +85,13 @@ def parse_jp2h(data):
|
|||
while byteStart < noBytes and boxLengthValue != 0:
|
||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||
if boxType == b"ihdr":
|
||||
width, height, channels, bpp = parse_ihdr(boxContents)
|
||||
width, height = parse_ihdr(boxContents)
|
||||
elif boxType == b"colr":
|
||||
colorspace = parse_colr(boxContents)
|
||||
elif boxType == b"res ":
|
||||
hdpi, vdpi = parse_res(boxContents)
|
||||
byteStart = byteEnd
|
||||
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||
return (width, height, colorspace, hdpi, vdpi)
|
||||
|
||||
|
||||
def parsejp2(data):
|
||||
|
@ -101,9 +102,7 @@ def parsejp2(data):
|
|||
while byteStart < noBytes and boxLengthValue != 0:
|
||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||
if boxType == b"jp2h":
|
||||
width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h(
|
||||
boxContents
|
||||
)
|
||||
width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents)
|
||||
break
|
||||
byteStart = byteEnd
|
||||
if not width:
|
||||
|
@ -113,41 +112,13 @@ def parsejp2(data):
|
|||
if not colorspace:
|
||||
raise Exception("no colorspace in jp2 header")
|
||||
# retrieving the dpi is optional so we do not error out if not present
|
||||
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||
|
||||
|
||||
def parsej2k(data):
|
||||
lsiz, rsiz, xsiz, ysiz, xosiz, yosiz, _, _, _, _, csiz = struct.unpack(
|
||||
">HHIIIIIIIIH", data[4:42]
|
||||
)
|
||||
ssiz = [None] * csiz
|
||||
xrsiz = [None] * csiz
|
||||
yrsiz = [None] * csiz
|
||||
for i in range(csiz):
|
||||
ssiz[i], xrsiz[i], yrsiz[i] = struct.unpack(
|
||||
"BBB", data[42 + 3 * i : 42 + 3 * (i + 1)]
|
||||
)
|
||||
assert ssiz == [7, 7, 7]
|
||||
return xsiz - xosiz, ysiz - yosiz, None, None, None, csiz, 8
|
||||
|
||||
|
||||
def parse(data):
|
||||
if data[:4] == b"\xff\x4f\xff\x51":
|
||||
return parsej2k(data)
|
||||
else:
|
||||
return parsejp2(data)
|
||||
return (width, height, colorspace, hdpi, vdpi)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
width, height, colorspace, hdpi, vdpi, channels, bpp = parse(
|
||||
open(sys.argv[1], "rb").read()
|
||||
)
|
||||
print("width = %d" % width)
|
||||
print("height = %d" % height)
|
||||
print("colorspace = %s" % colorspace)
|
||||
print("hdpi = %s" % hdpi)
|
||||
print("vdpi = %s" % vdpi)
|
||||
print("channels = %s" % channels)
|
||||
print("bpp = %s" % bpp)
|
||||
width, height, colorspace = parsejp2(open(sys.argv[1]).read())
|
||||
sys.stdout.write("width = %d" % width)
|
||||
sys.stdout.write("height = %d" % height)
|
||||
sys.stdout.write("colorspace = %s" % colorspace)
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 1.9 KiB |
Binary file not shown.
2
tox.ini
2
tox.ini
|
@ -4,7 +4,7 @@
|
|||
# and then run "tox" from this directory.
|
||||
|
||||
[tox]
|
||||
envlist = py37, py38, py39, py310
|
||||
envlist = py35, py36, py37, py38, py39
|
||||
skip_missing_interpreters = true
|
||||
|
||||
[testenv]
|
||||
|
|
Loading…
Reference in a new issue