forked from josch/img2pdf
Compare commits
52 commits
Author | SHA1 | Date | |
---|---|---|---|
819b366bf5 | |||
cc8c708295 | |||
fb9537d8b7 | |||
7678435eb7 | |||
ba7a360866 | |||
7f0bf47ff3 | |||
|
5cd0918d50 | ||
|
f157ced05d | ||
09064e8e70 | |||
2f736d7891 | |||
e05580a49a | |||
acc25a4926 | |||
f597887088 | |||
3e832fbcc2 | |||
1e8557cef1 | |||
29921eeabd | |||
33139612f8 | |||
64d27f4a8b | |||
85cbe1d128 | |||
b25429a4c1 | |||
c703e9df06 | |||
79e9985f35 | |||
cb2644c34f | |||
81502f21af | |||
0cbcb8fa12 | |||
e9e04b6dd9 | |||
fc059ee471 | |||
25466113e9 | |||
7405635b72 | |||
aea472101b | |||
7fa67bb337 | |||
7d40569aa1 | |||
83f9c32328 | |||
be8369373f | |||
10c6901fa3 | |||
57d7e07e6b | |||
272fe0433f | |||
ef7b9e739d | |||
af6fe27d53 | |||
bad6fcae39 | |||
d9b90499f3 | |||
edb0d29a14 | |||
bb3e8b0098 | |||
f454ebc6a6 | |||
c3db273e23 | |||
87afabd3cf | |||
|
5045282cc2 | ||
fb4b96452a | |||
c553e169a4 | |||
d9345ac767 | |||
1d52530229 | |||
3b117e674b |
9 changed files with 1304 additions and 282 deletions
3
.mailmap
Normal file
3
.mailmap
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
|
||||||
|
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <j.schauer@email.de>
|
||||||
|
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <josch@pyneo.org>
|
23
CHANGES.rst
23
CHANGES.rst
|
@ -2,6 +2,29 @@
|
||||||
CHANGES
|
CHANGES
|
||||||
=======
|
=======
|
||||||
|
|
||||||
|
0.5.1 (2023-11-26)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
- no default ICC profile location for PDF/A-1b on Windows
|
||||||
|
- workaround for PNG input without dpi units but non-square dpi aspect ratio
|
||||||
|
|
||||||
|
0.5.0 (2023-10-28)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
- support MIFF for 16 bit CMYK input
|
||||||
|
- accept pathlib.Path objects as input
|
||||||
|
- don't store RGB ICC profiles from bilevel or grayscale TIFF, PNG and JPEG
|
||||||
|
- thumbnails are no longer included by default and --include-thumbnails has to
|
||||||
|
be used if you want them
|
||||||
|
- support for pikepdf (>= 6.2.0)
|
||||||
|
|
||||||
|
0.4.4 (2022-04-07)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
- --viewer-page-layout support for twopageright and twopageleft
|
||||||
|
- Add B and JB paper sizes
|
||||||
|
- support for pikepdf (>= 5.0.0) and Pillow (>= 9.1.0)
|
||||||
|
|
||||||
0.4.3 (2021-10-24)
|
0.4.3 (2021-10-24)
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
|
|
39
HACKING
39
HACKING
|
@ -27,6 +27,41 @@ Making a new release
|
||||||
|
|
||||||
- Build and upload to pypi:
|
- Build and upload to pypi:
|
||||||
|
|
||||||
$ rm dist/*
|
$ rm -rf dist/*
|
||||||
$ python3 setup.py sdist
|
$ python3 setup.py sdist
|
||||||
$ twine upload --sign dist/*
|
$ twine upload dist/*
|
||||||
|
|
||||||
|
Using debbisect to find regressions
|
||||||
|
-----------------------------------
|
||||||
|
|
||||||
|
$ debbisect --cache=./cache --depends="git,ca-certificates,python3,
|
||||||
|
ghostscript,imagemagick,mupdf-tools,poppler-utils,python3-pil,
|
||||||
|
python3-pytest,python3-numpy,python3-scipy,python3-pikepdf" \
|
||||||
|
--verbose 2023-09-16 2023-10-24 \
|
||||||
|
'chroot "$1" sh -c "
|
||||||
|
git clone https://gitlab.mister-muffin.de/josch/img2pdf.git
|
||||||
|
&& cd img2pdf
|
||||||
|
&& pytest 'src/img2pdf_test.py::test_jpg_2000_rgba8[internal]"'
|
||||||
|
|
||||||
|
Using debbisect cache
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
$ mmdebstrap --variant=apt --aptopt='Acquire::Check-Valid-Until "false"' \
|
||||||
|
--include=git,ca-certificates,python3,ghostscript,imagemagick \
|
||||||
|
--include=mupdf-tools,poppler-utils,python3-pil,python3-pytest \
|
||||||
|
--include=python3-numpy,python3-scipy,python3-pikepdf \
|
||||||
|
--hook-dir=/usr/share/mmdebstrap/hooks/file-mirror-automount \
|
||||||
|
--setup-hook='mkdir -p "$1/home/josch/git/devscripts/cache/pool/"' \
|
||||||
|
--setup-hook='mount -o ro,bind /home/josch/git/devscripts/cache/pool/ "$1/home/josch/git/devscripts/cache/pool/"' \
|
||||||
|
--chrooted-customize-hook=bash
|
||||||
|
unstable /dev/null
|
||||||
|
file:///home/josch/git/devscripts/cache/archive/debian/20231022T090139Z/
|
||||||
|
|
||||||
|
Bisecting imagemagick
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
$ git clean -fdx && git reset --hard
|
||||||
|
$ ./configure --prefix=$(pwd)/prefix
|
||||||
|
$ make -j$(nproc)
|
||||||
|
$ make install
|
||||||
|
$ LD_LIBRARY_PATH=$(pwd)/prefix/lib prefix/bin/compare ...
|
||||||
|
|
50
README.md
50
README.md
|
@ -27,15 +27,15 @@ software, because the raw pixel data never has to be loaded into memory.
|
||||||
The following table shows how img2pdf handles different input depending on the
|
The following table shows how img2pdf handles different input depending on the
|
||||||
input file format and image color space.
|
input file format and image color space.
|
||||||
|
|
||||||
| Format | Colorspace | Result |
|
| Format | Colorspace | Result |
|
||||||
| -------------------- | ------------------------------ | ------------- |
|
| ------------------------------------- | ------------------------------ | ------------- |
|
||||||
| JPEG | any | direct |
|
| JPEG | any | direct |
|
||||||
| JPEG2000 | any | direct |
|
| JPEG2000 | any | direct |
|
||||||
| PNG (non-interlaced) | any | direct |
|
| PNG (non-interlaced, no transparency) | any | direct |
|
||||||
| TIFF (CCITT Group 4) | monochrome | direct |
|
| TIFF (CCITT Group 4) | monochrome | direct |
|
||||||
| any | any except CMYK and monochrome | PNG Paeth |
|
| any | any except CMYK and monochrome | PNG Paeth |
|
||||||
| any | monochrome | CCITT Group 4 |
|
| any | monochrome | CCITT Group 4 |
|
||||||
| any | CMYK | flate |
|
| any | CMYK | flate |
|
||||||
|
|
||||||
For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4
|
For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4
|
||||||
encoded data, img2pdf directly embeds the image data into the PDF without
|
encoded data, img2pdf directly embeds the image data into the PDF without
|
||||||
|
@ -72,11 +72,6 @@ Bugs
|
||||||
when embedded into the PDF cannot be read by the Adobe Acrobat Reader,
|
when embedded into the PDF cannot be read by the Adobe Acrobat Reader,
|
||||||
please contact me.
|
please contact me.
|
||||||
|
|
||||||
- I have not yet figured out how to determine the colorspace of JPEG2000
|
|
||||||
files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000
|
|
||||||
files with other colorspaces, you must explicitly specify it using the
|
|
||||||
`--colorspace` option.
|
|
||||||
|
|
||||||
- An error is produced if the input image is broken. This commonly happens if
|
- An error is produced if the input image is broken. This commonly happens if
|
||||||
the input image has an invalid EXIF Orientation value of zero. Even though
|
the input image has an invalid EXIF Orientation value of zero. Even though
|
||||||
only nine different values from 1 to 9 are permitted, Anroid phones and
|
only nine different values from 1 to 9 are permitted, Anroid phones and
|
||||||
|
@ -122,10 +117,9 @@ You can then test the converter using:
|
||||||
|
|
||||||
$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg
|
$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg
|
||||||
|
|
||||||
For Microsoft Windows users, PyInstaller based .exe files are produced by
|
If you don't want to setup Python on Windows, then head to the
|
||||||
appveyor. If you don't want to install Python before using img2pdf you can head
|
[releases](/josch/img2pdf/releases) section and download the latest
|
||||||
to appveyor and click on "Artifacts" to download the latest version:
|
`img2pdf.exe`.
|
||||||
https://ci.appveyor.com/project/josch/img2pdf
|
|
||||||
|
|
||||||
GUI
|
GUI
|
||||||
---
|
---
|
||||||
|
@ -152,6 +146,10 @@ The package can also be used as a library:
|
||||||
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
||||||
f1.write(img2pdf.convert(f2))
|
f1.write(img2pdf.convert(f2))
|
||||||
|
|
||||||
|
# opening using pathlib
|
||||||
|
with open("name.pdf","wb") as f:
|
||||||
|
f.write(img2pdf.convert(pathlib.Path('test.jpg')))
|
||||||
|
|
||||||
# using in-memory image data
|
# using in-memory image data
|
||||||
with open("name.pdf","wb") as f:
|
with open("name.pdf","wb") as f:
|
||||||
f.write(img2pdf.convert("\x89PNG...")
|
f.write(img2pdf.convert("\x89PNG...")
|
||||||
|
@ -194,6 +192,11 @@ The package can also be used as a library:
|
||||||
with open("name.pdf","wb") as f:
|
with open("name.pdf","wb") as f:
|
||||||
f.write(img2pdf.convert(glob.glob("/path/to/*.jpg")))
|
f.write(img2pdf.convert(glob.glob("/path/to/*.jpg")))
|
||||||
|
|
||||||
|
# convert all files matching a glob using pathlib.Path
|
||||||
|
from pathlib import Path
|
||||||
|
with open("name.pdf","wb") as f:
|
||||||
|
f.write(img2pdf.convert(*Path("/path").glob("**/*.jpg")))
|
||||||
|
|
||||||
# ignore invalid rotation values in the input images
|
# ignore invalid rotation values in the input images
|
||||||
with open("name.pdf","wb") as f:
|
with open("name.pdf","wb") as f:
|
||||||
f.write(img2pdf.convert('test.jpg'), rotation=img2pdf.Rotation.ifvalid)
|
f.write(img2pdf.convert('test.jpg'), rotation=img2pdf.Rotation.ifvalid)
|
||||||
|
@ -305,3 +308,14 @@ Tesseract might not do a lossless conversion. For example it converts CMYK
|
||||||
input to RGB and removes the alpha channel from images with transparency. For
|
input to RGB and removes the alpha channel from images with transparency. For
|
||||||
multipage TIFF or animated GIF, it will only convert the first frame.
|
multipage TIFF or animated GIF, it will only convert the first frame.
|
||||||
|
|
||||||
|
Comparison to econvert from ExactImage
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
Like pdflatex and podofoimg2pf, econvert is able to embed JPEG images into PDF
|
||||||
|
directly without re-encoding but when given other file formats, it stores them
|
||||||
|
just using flate compressen, which unnecessarily increases the filesize.
|
||||||
|
Furthermore, it throws an error with CMYK TIF input. It also doesn't store CMYK
|
||||||
|
jpeg files as CMYK but converts them to RGB, so it's not lossless. When trying
|
||||||
|
to feed it 16bit files, it errors out with Unhandled bps/spp combination. It
|
||||||
|
also seems to choose JPEG encoding when using it on some file types (like
|
||||||
|
palette images) making it again not lossless for that input as well.
|
||||||
|
|
|
@ -26,7 +26,8 @@ build: off
|
||||||
|
|
||||||
after_test:
|
after_test:
|
||||||
- "%PYTHON%\\python.exe setup.py bdist_wheel"
|
- "%PYTHON%\\python.exe setup.py bdist_wheel"
|
||||||
- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole src/img2pdf.py"
|
- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --console --nowindowed --name img2pdf src/img2pdf.py"
|
||||||
|
#- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole --windowed --name img2pdf_windowed src/img2pdf.py"
|
||||||
|
|
||||||
artifacts:
|
artifacts:
|
||||||
- path: dist\*
|
- path: dist\*
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -1,7 +1,7 @@
|
||||||
import sys
|
import sys
|
||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
|
|
||||||
VERSION = "0.4.3"
|
VERSION = "0.5.1"
|
||||||
|
|
||||||
INSTALL_REQUIRES = (
|
INSTALL_REQUIRES = (
|
||||||
"Pillow",
|
"Pillow",
|
||||||
|
|
665
src/img2pdf.py
665
src/img2pdf.py
|
@ -22,12 +22,22 @@ import sys
|
||||||
import os
|
import os
|
||||||
import zlib
|
import zlib
|
||||||
import argparse
|
import argparse
|
||||||
from PIL import Image, TiffImagePlugin
|
from PIL import Image, TiffImagePlugin, GifImagePlugin, ImageCms
|
||||||
|
|
||||||
|
if hasattr(GifImagePlugin, "LoadingStrategy"):
|
||||||
|
# Pillow 9.0.0 started emitting all frames but the first as RGB instead of
|
||||||
|
# P to make sure that more than 256 colors can be represented. But palette
|
||||||
|
# images compress far better than RGB images in PDF so we instruct Pillow
|
||||||
|
# to only emit RGB frames if the palette differs and return P otherwise.
|
||||||
|
# This works since Pillow 9.1.0.
|
||||||
|
GifImagePlugin.LOADING_STRATEGY = (
|
||||||
|
GifImagePlugin.LoadingStrategy.RGB_AFTER_DIFFERENT_PALETTE_ONLY
|
||||||
|
)
|
||||||
|
|
||||||
# TiffImagePlugin.DEBUG = True
|
# TiffImagePlugin.DEBUG = True
|
||||||
from PIL.ExifTags import TAGS
|
from PIL.ExifTags import TAGS
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
from jp2 import parsejp2
|
import jp2
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import logging
|
import logging
|
||||||
|
@ -35,6 +45,8 @@ import struct
|
||||||
import platform
|
import platform
|
||||||
import hashlib
|
import hashlib
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
import re
|
||||||
|
import io
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -50,7 +62,7 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
have_pikepdf = False
|
have_pikepdf = False
|
||||||
|
|
||||||
__version__ = "0.4.3"
|
__version__ = "0.5.1"
|
||||||
default_dpi = 96.0
|
default_dpi = 96.0
|
||||||
papersizes = {
|
papersizes = {
|
||||||
"letter": "8.5inx11in",
|
"letter": "8.5inx11in",
|
||||||
|
@ -61,6 +73,20 @@ papersizes = {
|
||||||
"a4": "210mmx297mm",
|
"a4": "210mmx297mm",
|
||||||
"a5": "148mmx210mm",
|
"a5": "148mmx210mm",
|
||||||
"a6": "105mmx148mm",
|
"a6": "105mmx148mm",
|
||||||
|
"b0": "1000mmx1414mm",
|
||||||
|
"b1": "707mmx1000mm",
|
||||||
|
"b2": "500mmx707mm",
|
||||||
|
"b3": "353mmx500mm",
|
||||||
|
"b4": "250mmx353mm",
|
||||||
|
"b5": "176mmx250mm",
|
||||||
|
"b6": "125mmx176mm",
|
||||||
|
"jb0": "1030mmx1456mm",
|
||||||
|
"jb1": "728mmx1030mm",
|
||||||
|
"jb2": "515mmx728mm",
|
||||||
|
"jb3": "364mmx515mm",
|
||||||
|
"jb4": "257mmx364mm",
|
||||||
|
"jb5": "182mmx257mm",
|
||||||
|
"jb6": "128mmx182mm",
|
||||||
"legal": "8.5inx14in",
|
"legal": "8.5inx14in",
|
||||||
"tabloid": "11inx17in",
|
"tabloid": "11inx17in",
|
||||||
}
|
}
|
||||||
|
@ -73,6 +99,20 @@ papernames = {
|
||||||
"a4": "A4",
|
"a4": "A4",
|
||||||
"a5": "A5",
|
"a5": "A5",
|
||||||
"a6": "A6",
|
"a6": "A6",
|
||||||
|
"b0": "B0",
|
||||||
|
"b1": "B1",
|
||||||
|
"b2": "B2",
|
||||||
|
"b3": "B3",
|
||||||
|
"b4": "B4",
|
||||||
|
"b5": "B5",
|
||||||
|
"b6": "B6",
|
||||||
|
"jb0": "JB0",
|
||||||
|
"jb1": "JB1",
|
||||||
|
"jb2": "JB2",
|
||||||
|
"jb3": "JB3",
|
||||||
|
"jb4": "JB4",
|
||||||
|
"jb5": "JB5",
|
||||||
|
"jb6": "JB6",
|
||||||
"legal": "Legal",
|
"legal": "Legal",
|
||||||
"tabloid": "Tabloid",
|
"tabloid": "Tabloid",
|
||||||
}
|
}
|
||||||
|
@ -87,11 +127,16 @@ PageOrientation = Enum("PageOrientation", "portrait landscape")
|
||||||
|
|
||||||
Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other")
|
Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other")
|
||||||
|
|
||||||
ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO other")
|
ImageFormat = Enum(
|
||||||
|
"ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other"
|
||||||
|
)
|
||||||
|
|
||||||
PageMode = Enum("PageMode", "none outlines thumbs")
|
PageMode = Enum("PageMode", "none outlines thumbs")
|
||||||
|
|
||||||
PageLayout = Enum("PageLayout", "single onecolumn twocolumnright twocolumnleft twopageright twopageleft")
|
PageLayout = Enum(
|
||||||
|
"PageLayout",
|
||||||
|
"single onecolumn twocolumnright twocolumnleft twopageright twopageleft",
|
||||||
|
)
|
||||||
|
|
||||||
Magnification = Enum("Magnification", "fit fith fitbh")
|
Magnification = Enum("Magnification", "fit fith fitbh")
|
||||||
|
|
||||||
|
@ -389,6 +434,28 @@ class ExifOrientationError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# temporary change the attribute of an object using a context manager
|
||||||
|
class temp_attr:
|
||||||
|
def __init__(self, obj, field, value):
|
||||||
|
self.obj = obj
|
||||||
|
self.field = field
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.exists = False
|
||||||
|
if hasattr(self.obj, self.field):
|
||||||
|
self.exists = True
|
||||||
|
self.old_value = getattr(self.obj, self.field)
|
||||||
|
logger.debug(f"setting {self.obj}.{self.field} = {self.value}")
|
||||||
|
setattr(self.obj, self.field, self.value)
|
||||||
|
|
||||||
|
def __exit__(self, exctype, excinst, exctb):
|
||||||
|
if self.exists:
|
||||||
|
setattr(self.obj, self.field, self.old_value)
|
||||||
|
else:
|
||||||
|
delattr(self.obj, self.field)
|
||||||
|
|
||||||
|
|
||||||
# without pdfrw this function is a no-op
|
# without pdfrw this function is a no-op
|
||||||
def my_convert_load(string):
|
def my_convert_load(string):
|
||||||
return string
|
return string
|
||||||
|
@ -655,7 +722,7 @@ class pdfdoc(object):
|
||||||
self.writer.docinfo = PdfDict(indirect=True)
|
self.writer.docinfo = PdfDict(indirect=True)
|
||||||
|
|
||||||
def datetime_to_pdfdate(dt):
|
def datetime_to_pdfdate(dt):
|
||||||
return dt.strftime("%Y%m%d%H%M%SZ")
|
return dt.astimezone(tz=timezone.utc).strftime("%Y%m%d%H%M%SZ")
|
||||||
|
|
||||||
for k in ["Title", "Author", "Creator", "Producer", "Subject"]:
|
for k in ["Title", "Author", "Creator", "Producer", "Subject"]:
|
||||||
v = locals()[k.lower()]
|
v = locals()[k.lower()]
|
||||||
|
@ -665,7 +732,7 @@ class pdfdoc(object):
|
||||||
v = PdfString.encode(v)
|
v = PdfString.encode(v)
|
||||||
self.writer.docinfo[getattr(PdfName, k)] = v
|
self.writer.docinfo[getattr(PdfName, k)] = v
|
||||||
|
|
||||||
now = datetime.now()
|
now = datetime.now().astimezone()
|
||||||
for k in ["CreationDate", "ModDate"]:
|
for k in ["CreationDate", "ModDate"]:
|
||||||
v = locals()[k.lower()]
|
v = locals()[k.lower()]
|
||||||
if v is None and nodate:
|
if v is None and nodate:
|
||||||
|
@ -685,7 +752,7 @@ class pdfdoc(object):
|
||||||
)
|
)
|
||||||
|
|
||||||
def datetime_to_xmpdate(dt):
|
def datetime_to_xmpdate(dt):
|
||||||
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
return dt.astimezone(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
|
||||||
self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
|
self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
|
||||||
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>
|
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>
|
||||||
|
@ -760,8 +827,10 @@ class pdfdoc(object):
|
||||||
artborder=None,
|
artborder=None,
|
||||||
iccp=None,
|
iccp=None,
|
||||||
):
|
):
|
||||||
assert (color != Colorspace.RGBA and color != Colorspace.LA) or (
|
assert (
|
||||||
imgformat == ImageFormat.PNG and smaskdata is not None
|
color not in [Colorspace.RGBA, Colorspace.LA]
|
||||||
|
or (imgformat == ImageFormat.PNG and smaskdata is not None)
|
||||||
|
or imgformat == ImageFormat.JPEG2000
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.engine == Engine.pikepdf:
|
if self.engine == Engine.pikepdf:
|
||||||
|
@ -785,7 +854,13 @@ class pdfdoc(object):
|
||||||
if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA:
|
if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA:
|
||||||
colorspace = PdfName.DeviceGray
|
colorspace = PdfName.DeviceGray
|
||||||
elif color == Colorspace.RGB or color == Colorspace.RGBA:
|
elif color == Colorspace.RGB or color == Colorspace.RGBA:
|
||||||
colorspace = PdfName.DeviceRGB
|
if color == Colorspace.RGBA and imgformat == ImageFormat.JPEG2000:
|
||||||
|
# there is no DeviceRGBA and for JPXDecode it is okay to have
|
||||||
|
# no colorspace as the pdf reader is supposed to get this info
|
||||||
|
# from the jpeg2000 payload itself
|
||||||
|
colorspace = None
|
||||||
|
else:
|
||||||
|
colorspace = PdfName.DeviceRGB
|
||||||
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
|
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
|
||||||
colorspace = PdfName.DeviceCMYK
|
colorspace = PdfName.DeviceCMYK
|
||||||
elif color == Colorspace.P:
|
elif color == Colorspace.P:
|
||||||
|
@ -856,7 +931,8 @@ class pdfdoc(object):
|
||||||
image[PdfName.Filter] = ofilter
|
image[PdfName.Filter] = ofilter
|
||||||
image[PdfName.Width] = imgwidthpx
|
image[PdfName.Width] = imgwidthpx
|
||||||
image[PdfName.Height] = imgheightpx
|
image[PdfName.Height] = imgheightpx
|
||||||
image[PdfName.ColorSpace] = colorspace
|
if colorspace is not None:
|
||||||
|
image[PdfName.ColorSpace] = colorspace
|
||||||
image[PdfName.BitsPerComponent] = depth
|
image[PdfName.BitsPerComponent] = depth
|
||||||
|
|
||||||
smask = None
|
smask = None
|
||||||
|
@ -1106,9 +1182,17 @@ class pdfdoc(object):
|
||||||
[initial_page, PdfName.XYZ, NullObject, NullObject, 0]
|
[initial_page, PdfName.XYZ, NullObject, NullObject, 0]
|
||||||
)
|
)
|
||||||
|
|
||||||
# the /OpenAction array must contain the page as an indirect object
|
# The /OpenAction array must contain the page as an indirect object.
|
||||||
|
# This changed some time after 4.2.0 and on or before 5.0.0 and current
|
||||||
|
# versions require to use .obj or otherwise we get:
|
||||||
|
# TypeError: Can't convert ObjectHelper (or subclass) to Object
|
||||||
|
# implicitly. Use .obj to get access the underlying object.
|
||||||
|
# See https://github.com/pikepdf/pikepdf/issues/313 for details.
|
||||||
if self.engine == Engine.pikepdf:
|
if self.engine == Engine.pikepdf:
|
||||||
initial_page = self.writer.make_indirect(initial_page)
|
if isinstance(initial_page, pikepdf.Page):
|
||||||
|
initial_page = self.writer.make_indirect(initial_page.obj)
|
||||||
|
else:
|
||||||
|
initial_page = self.writer.make_indirect(initial_page)
|
||||||
|
|
||||||
if self.magnification == Magnification.fit:
|
if self.magnification == Magnification.fit:
|
||||||
catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.Fit])
|
catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.Fit])
|
||||||
|
@ -1185,8 +1269,11 @@ class pdfdoc(object):
|
||||||
|
|
||||||
# now write out the PDF
|
# now write out the PDF
|
||||||
if self.engine == Engine.pikepdf:
|
if self.engine == Engine.pikepdf:
|
||||||
|
kwargs = {}
|
||||||
|
if pikepdf.__version__ >= "6.2.0":
|
||||||
|
kwargs["deterministic_id"] = True
|
||||||
self.writer.save(
|
self.writer.save(
|
||||||
outputstream, min_version=self.output_version, linearize=True
|
outputstream, min_version=self.output_version, linearize=True, **kwargs
|
||||||
)
|
)
|
||||||
elif self.engine == Engine.pdfrw:
|
elif self.engine == Engine.pdfrw:
|
||||||
self.writer.trailer.Info = self.writer.docinfo
|
self.writer.trailer.Info = self.writer.docinfo
|
||||||
|
@ -1214,7 +1301,7 @@ def get_imgmetadata(
|
||||||
if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
|
if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
|
||||||
# this codepath gets called if the PIL installation is not able to
|
# this codepath gets called if the PIL installation is not able to
|
||||||
# handle JPEG2000 files
|
# handle JPEG2000 files
|
||||||
imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata)
|
imgwidthpx, imgheightpx, ics, hdpi, vdpi, channels, bpp = jp2.parse(rawdata)
|
||||||
|
|
||||||
if hdpi is None:
|
if hdpi is None:
|
||||||
hdpi = default_dpi
|
hdpi = default_dpi
|
||||||
|
@ -1224,7 +1311,19 @@ def get_imgmetadata(
|
||||||
else:
|
else:
|
||||||
imgwidthpx, imgheightpx = imgdata.size
|
imgwidthpx, imgheightpx = imgdata.size
|
||||||
|
|
||||||
ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi))
|
ndpi = imgdata.info.get("dpi")
|
||||||
|
if ndpi is None:
|
||||||
|
# the PNG plugin of PIL adds the undocumented "aspect" field instead of
|
||||||
|
# the "dpi" field if the PNG pHYs chunk unit is not set to meters
|
||||||
|
if imgformat == ImageFormat.PNG and imgdata.info.get("aspect") is not None:
|
||||||
|
aspect = imgdata.info["aspect"]
|
||||||
|
# make sure not to go below the default dpi
|
||||||
|
if aspect[0] > aspect[1]:
|
||||||
|
ndpi = (default_dpi * aspect[0] / aspect[1], default_dpi)
|
||||||
|
else:
|
||||||
|
ndpi = (default_dpi, default_dpi * aspect[1] / aspect[0])
|
||||||
|
else:
|
||||||
|
ndpi = (default_dpi, default_dpi)
|
||||||
# In python3, the returned dpi value for some tiff images will
|
# In python3, the returned dpi value for some tiff images will
|
||||||
# not be an integer but a float. To make the behaviour of
|
# not be an integer but a float. To make the behaviour of
|
||||||
# img2pdf the same between python2 and python3, we convert that
|
# img2pdf the same between python2 and python3, we convert that
|
||||||
|
@ -1234,7 +1333,7 @@ def get_imgmetadata(
|
||||||
ics = imgdata.mode
|
ics = imgdata.mode
|
||||||
|
|
||||||
# GIF and PNG files with transparency are supported
|
# GIF and PNG files with transparency are supported
|
||||||
if (imgformat == ImageFormat.PNG or imgformat == ImageFormat.GIF) and (
|
if imgformat in [ImageFormat.PNG, ImageFormat.GIF, ImageFormat.JPEG2000] and (
|
||||||
ics in ["RGBA", "LA"] or "transparency" in imgdata.info
|
ics in ["RGBA", "LA"] or "transparency" in imgdata.info
|
||||||
):
|
):
|
||||||
# Must check the IHDR chunk for the bit depth, because PIL would lossily
|
# Must check the IHDR chunk for the bit depth, because PIL would lossily
|
||||||
|
@ -1244,6 +1343,10 @@ def get_imgmetadata(
|
||||||
if depth > 8:
|
if depth > 8:
|
||||||
logger.warning("Image with transparency and a bit depth of %d." % depth)
|
logger.warning("Image with transparency and a bit depth of %d." % depth)
|
||||||
logger.warning("This is unsupported due to PIL limitations.")
|
logger.warning("This is unsupported due to PIL limitations.")
|
||||||
|
logger.warning(
|
||||||
|
"If you accept a lossy conversion, you can manually convert "
|
||||||
|
"your images to 8 bit using `convert -depth 8` from imagemagick"
|
||||||
|
)
|
||||||
raise AlphaChannelError(
|
raise AlphaChannelError(
|
||||||
"Refusing to work with multiple >8bit channels."
|
"Refusing to work with multiple >8bit channels."
|
||||||
)
|
)
|
||||||
|
@ -1354,6 +1457,53 @@ def get_imgmetadata(
|
||||||
iccp = None
|
iccp = None
|
||||||
if "icc_profile" in imgdata.info:
|
if "icc_profile" in imgdata.info:
|
||||||
iccp = imgdata.info.get("icc_profile")
|
iccp = imgdata.info.get("icc_profile")
|
||||||
|
# GIMP saves bilevel TIFF images and palette PNG images with only black and
|
||||||
|
# white in the palette with an RGB ICC profile which is useless
|
||||||
|
# https://gitlab.gnome.org/GNOME/gimp/-/issues/3438
|
||||||
|
# and produces an error in Adobe Acrobat, so we ignore it with a warning.
|
||||||
|
# imagemagick also used to (wrongly) include an RGB ICC profile for bilevel
|
||||||
|
# images: https://github.com/ImageMagick/ImageMagick/issues/2070
|
||||||
|
if iccp is not None and (
|
||||||
|
(color == Colorspace["1"] and imgformat == ImageFormat.TIFF)
|
||||||
|
or (
|
||||||
|
imgformat == ImageFormat.PNG
|
||||||
|
and color == Colorspace.P
|
||||||
|
and rawdata is not None
|
||||||
|
and parse_png(rawdata)[1]
|
||||||
|
in [b"\x00\x00\x00\xff\xff\xff", b"\xff\xff\xff\x00\x00\x00"]
|
||||||
|
)
|
||||||
|
):
|
||||||
|
with io.BytesIO(iccp) as f:
|
||||||
|
prf = ImageCms.ImageCmsProfile(f)
|
||||||
|
if (
|
||||||
|
prf.profile.model == "sRGB"
|
||||||
|
and prf.profile.manufacturer == "GIMP"
|
||||||
|
and prf.profile.profile_description == "GIMP built-in sRGB"
|
||||||
|
):
|
||||||
|
if imgformat == ImageFormat.TIFF:
|
||||||
|
logger.warning(
|
||||||
|
"Ignoring RGB ICC profile in bilevel TIFF produced by GIMP."
|
||||||
|
)
|
||||||
|
elif imgformat == ImageFormat.PNG:
|
||||||
|
logger.warning(
|
||||||
|
"Ignoring RGB ICC profile in 2-color palette PNG produced by GIMP."
|
||||||
|
)
|
||||||
|
logger.warning("https://gitlab.gnome.org/GNOME/gimp/-/issues/3438")
|
||||||
|
iccp = None
|
||||||
|
# SmartAlbums old version (found 2.2.6) exports JPG with only 1 compone
|
||||||
|
# with an RGB ICC profile which is useless.
|
||||||
|
# This produces an error in Adobe Acrobat, so we ignore it with a warning.
|
||||||
|
# Update: Found another case, the JPG is created by Adobe PhotoShop, so we
|
||||||
|
# don't check software anymore.
|
||||||
|
if iccp is not None and (
|
||||||
|
(color == Colorspace["L"] and imgformat == ImageFormat.JPEG)
|
||||||
|
):
|
||||||
|
with io.BytesIO(iccp) as f:
|
||||||
|
prf = ImageCms.ImageCmsProfile(f)
|
||||||
|
|
||||||
|
if prf.profile.xcolor_space not in ("GRAY"):
|
||||||
|
logger.warning("Ignoring non-GRAY ICC profile in Grayscale JPG")
|
||||||
|
iccp = None
|
||||||
|
|
||||||
logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
|
logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
|
||||||
|
|
||||||
|
@ -1410,27 +1560,29 @@ def transcode_monochrome(imgdata):
|
||||||
# into putting everything into a single strip. Thanks to Andrew Murray for
|
# into putting everything into a single strip. Thanks to Andrew Murray for
|
||||||
# the hack.
|
# the hack.
|
||||||
#
|
#
|
||||||
# This can be dropped once this gets merged:
|
# Since version 8.4.0 Pillow allows us to modify the strip size explicitly
|
||||||
# https://github.com/python-pillow/Pillow/pull/5744
|
tmp_strip_size = (imgdata.size[0] + 7) // 8 * imgdata.size[1]
|
||||||
pillow__getitem__ = TiffImagePlugin.ImageFileDirectory_v2.__getitem__
|
if hasattr(TiffImagePlugin, "STRIP_SIZE"):
|
||||||
|
# we are using Pillow 8.4.0 or later
|
||||||
|
with temp_attr(TiffImagePlugin, "STRIP_SIZE", tmp_strip_size):
|
||||||
|
im.save(newimgio, format="TIFF", compression="group4")
|
||||||
|
else:
|
||||||
|
# only needed for Pillow 8.3.x but works for versions before that as
|
||||||
|
# well
|
||||||
|
pillow__getitem__ = TiffImagePlugin.ImageFileDirectory_v2.__getitem__
|
||||||
|
|
||||||
def __getitem__(self, tag):
|
def __getitem__(self, tag):
|
||||||
overrides = {
|
overrides = {
|
||||||
TiffImagePlugin.ROWSPERSTRIP: imgdata.size[1],
|
TiffImagePlugin.ROWSPERSTRIP: imgdata.size[1],
|
||||||
TiffImagePlugin.STRIPBYTECOUNTS: [
|
TiffImagePlugin.STRIPBYTECOUNTS: [tmp_strip_size],
|
||||||
(imgdata.size[0] + 7) // 8 * imgdata.size[1]
|
TiffImagePlugin.STRIPOFFSETS: [0],
|
||||||
],
|
}
|
||||||
TiffImagePlugin.STRIPOFFSETS: [0],
|
return overrides.get(tag, pillow__getitem__(self, tag))
|
||||||
}
|
|
||||||
return overrides.get(tag, pillow__getitem__(self, tag))
|
|
||||||
|
|
||||||
# use try/finally to make sure that __getitem__ is reset even if save()
|
with temp_attr(
|
||||||
# raises an exception
|
TiffImagePlugin.ImageFileDirectory_v2, "__getitem__", __getitem__
|
||||||
try:
|
):
|
||||||
TiffImagePlugin.ImageFileDirectory_v2.__getitem__ = __getitem__
|
im.save(newimgio, format="TIFF", compression="group4")
|
||||||
im.save(newimgio, format="TIFF", compression="group4")
|
|
||||||
finally:
|
|
||||||
TiffImagePlugin.ImageFileDirectory_v2.__getitem__ = pillow__getitem__
|
|
||||||
|
|
||||||
# Open new image in memory
|
# Open new image in memory
|
||||||
newimgio.seek(0)
|
newimgio.seek(0)
|
||||||
|
@ -1460,7 +1612,204 @@ def parse_png(rawdata):
|
||||||
return pngidat, palette
|
return pngidat, palette
|
||||||
|
|
||||||
|
|
||||||
def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
miff_re = re.compile(
|
||||||
|
r"""
|
||||||
|
[^\x00-\x20\x7f-\x9f] # the field name must not start with a control char or space
|
||||||
|
[^=]+ # the field name can even contain spaces
|
||||||
|
= # field name and value are separated by an equal sign
|
||||||
|
(?:
|
||||||
|
[^\x00-\x20\x7f-\x9f{}] # either chars that are not braces and not control chars
|
||||||
|
|{[^}]*} # or any kind of char surrounded by braces
|
||||||
|
)+""",
|
||||||
|
re.VERBOSE,
|
||||||
|
)
|
||||||
|
|
||||||
|
# https://imagemagick.org/script/miff.php
|
||||||
|
# turn off black formatting until python 3.10 is available on more platforms
|
||||||
|
# and we can use match/case
|
||||||
|
# fmt: off
|
||||||
|
def parse_miff(data):
|
||||||
|
results = []
|
||||||
|
header, rest = data.split(b":\x1a", 1)
|
||||||
|
header = header.decode("ISO-8859-1")
|
||||||
|
assert header.lower().startswith("id=imagemagick")
|
||||||
|
hdata = {}
|
||||||
|
for i, line in enumerate(re.findall(miff_re, header)):
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
k, v = line.split("=", 1)
|
||||||
|
if i == 0:
|
||||||
|
assert k.lower() == "id"
|
||||||
|
assert v.lower() == "imagemagick"
|
||||||
|
#match k.lower():
|
||||||
|
# case "class":
|
||||||
|
if k.lower() == "class":
|
||||||
|
#match v:
|
||||||
|
# case "DirectClass" | "PseudoClass":
|
||||||
|
if v in ["DirectClass", "PseudoClass"]:
|
||||||
|
hdata["class"] = v
|
||||||
|
# case _:
|
||||||
|
else:
|
||||||
|
print("cannot understand class", v)
|
||||||
|
# case "colorspace":
|
||||||
|
elif k.lower() == "colorspace":
|
||||||
|
# theoretically RGBA and CMYKA should be supported as well
|
||||||
|
# please teach me how to create such a MIFF file
|
||||||
|
#match v:
|
||||||
|
# case "sRGB" | "CMYK" | "Gray":
|
||||||
|
if v in ["sRGB", "CMYK", "Gray"]:
|
||||||
|
hdata["colorspace"] = v
|
||||||
|
# case _:
|
||||||
|
else:
|
||||||
|
print("cannot understand colorspace", v)
|
||||||
|
# case "depth":
|
||||||
|
elif k.lower() == "depth":
|
||||||
|
#match v:
|
||||||
|
# case "8" | "16" | "32":
|
||||||
|
if v in ["8", "16", "32"]:
|
||||||
|
hdata["depth"] = int(v)
|
||||||
|
# case _:
|
||||||
|
else:
|
||||||
|
print("cannot understand depth", v)
|
||||||
|
# case "colors":
|
||||||
|
elif k.lower() == "colors":
|
||||||
|
hdata["colors"] = int(v)
|
||||||
|
# case "matte":
|
||||||
|
elif k.lower() == "matte":
|
||||||
|
#match v:
|
||||||
|
# case "True":
|
||||||
|
if v == "True":
|
||||||
|
hdata["matte"] = True
|
||||||
|
# case "False":
|
||||||
|
elif v == "False":
|
||||||
|
hdata["matte"] = False
|
||||||
|
# case _:
|
||||||
|
else:
|
||||||
|
print("cannot understand matte", v)
|
||||||
|
# case "columns" | "rows":
|
||||||
|
elif k.lower() in ["columns", "rows"]:
|
||||||
|
hdata[k.lower()] = int(v)
|
||||||
|
# case "compression":
|
||||||
|
elif k.lower() == "compression":
|
||||||
|
print("compression not yet supported")
|
||||||
|
# case "profile":
|
||||||
|
elif k.lower() == "profile":
|
||||||
|
assert v in ["icc", "exif"]
|
||||||
|
hdata["profile"] = v
|
||||||
|
# case "resolution":
|
||||||
|
elif k.lower() == "resolution":
|
||||||
|
dpix, dpiy = v.split("x", 1)
|
||||||
|
hdata["resolution"] = (float(dpix), float(dpiy))
|
||||||
|
|
||||||
|
assert "depth" in hdata
|
||||||
|
assert "columns" in hdata
|
||||||
|
assert "rows" in hdata
|
||||||
|
#match hdata["class"]:
|
||||||
|
# case "DirectClass":
|
||||||
|
if hdata["class"] == "DirectClass":
|
||||||
|
if "colors" in hdata:
|
||||||
|
assert hdata["colors"] == 0
|
||||||
|
#match hdata["colorspace"]:
|
||||||
|
# case "sRGB":
|
||||||
|
if hdata["colorspace"] == "sRGB":
|
||||||
|
numchannels = 3
|
||||||
|
colorspace = Colorspace.RGB
|
||||||
|
# case "CMYK":
|
||||||
|
elif hdata["colorspace"] == "CMYK":
|
||||||
|
numchannels = 4
|
||||||
|
colorspace = Colorspace.CMYK
|
||||||
|
# case "Gray":
|
||||||
|
elif hdata["colorspace"] == "Gray":
|
||||||
|
numchannels = 1
|
||||||
|
colorspace = Colorspace.L
|
||||||
|
if hdata.get("matte"):
|
||||||
|
numchannels += 1
|
||||||
|
if hdata.get("profile"):
|
||||||
|
# there is no key encoding the length of icc or exif data
|
||||||
|
# according to the docs, the profile-icc key is supposed to do this
|
||||||
|
print("FAIL: exif")
|
||||||
|
else:
|
||||||
|
lenimgdata = (
|
||||||
|
hdata["depth"] // 8 * numchannels * hdata["columns"] * hdata["rows"]
|
||||||
|
)
|
||||||
|
assert len(rest) >= lenimgdata, (
|
||||||
|
len(rest),
|
||||||
|
hdata["depth"],
|
||||||
|
numchannels,
|
||||||
|
hdata["columns"],
|
||||||
|
hdata["rows"],
|
||||||
|
lenimgdata,
|
||||||
|
)
|
||||||
|
if colorspace == Colorspace.RGB and hdata["depth"] == 8:
|
||||||
|
newimg = Image.frombytes("RGB", (hdata["columns"], hdata["rows"]), rest[:lenimgdata])
|
||||||
|
imgdata, palette, depth = to_png_data(newimg)
|
||||||
|
assert palette == b""
|
||||||
|
assert depth == hdata["depth"]
|
||||||
|
imgfmt = ImageFormat.PNG
|
||||||
|
else:
|
||||||
|
imgdata = zlib.compress(rest[:lenimgdata])
|
||||||
|
imgfmt = ImageFormat.MIFF
|
||||||
|
results.append(
|
||||||
|
(
|
||||||
|
colorspace,
|
||||||
|
hdata.get("resolution") or (default_dpi, default_dpi),
|
||||||
|
imgfmt,
|
||||||
|
imgdata,
|
||||||
|
None, # smask
|
||||||
|
hdata["columns"],
|
||||||
|
hdata["rows"],
|
||||||
|
[], # palette
|
||||||
|
False, # inverted
|
||||||
|
hdata["depth"],
|
||||||
|
0, # rotation
|
||||||
|
None, # icc profile
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if len(rest) > lenimgdata:
|
||||||
|
# another image is here
|
||||||
|
assert rest[lenimgdata:][:14].lower() == b"id=imagemagick"
|
||||||
|
results.extend(parse_miff(rest[lenimgdata:]))
|
||||||
|
# case "PseudoClass":
|
||||||
|
elif hdata["class"] == "PseudoClass":
|
||||||
|
assert "colors" in hdata
|
||||||
|
if hdata.get("matte"):
|
||||||
|
numchannels = 2
|
||||||
|
else:
|
||||||
|
numchannels = 1
|
||||||
|
lenpal = 3 * hdata["colors"] * hdata["depth"] // 8
|
||||||
|
lenimgdata = numchannels * hdata["rows"] * hdata["columns"]
|
||||||
|
assert len(rest) >= lenpal + lenimgdata, (len(rest), lenpal, lenimgdata)
|
||||||
|
results.append(
|
||||||
|
(
|
||||||
|
Colorspace.RGB,
|
||||||
|
hdata.get("resolution") or (default_dpi, default_dpi),
|
||||||
|
ImageFormat.MIFF,
|
||||||
|
zlib.compress(rest[lenpal : lenpal + lenimgdata]),
|
||||||
|
None, # FIXME: allow alpha channel smask
|
||||||
|
hdata["columns"],
|
||||||
|
hdata["rows"],
|
||||||
|
rest[:lenpal], # palette
|
||||||
|
False, # inverted
|
||||||
|
hdata["depth"],
|
||||||
|
0, # rotation
|
||||||
|
None, # icc profile
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if len(rest) > lenpal + lenimgdata:
|
||||||
|
# another image is here
|
||||||
|
assert rest[lenpal + lenimgdata :][:14].lower() == b"id=imagemagick", (
|
||||||
|
len(rest),
|
||||||
|
lenpal,
|
||||||
|
lenimgdata,
|
||||||
|
)
|
||||||
|
results.extend(parse_miff(rest[lenpal + lenimgdata :]))
|
||||||
|
return results
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
|
||||||
|
def read_images(
|
||||||
|
rawdata, colorspace, first_frame_only=False, rot=None, include_thumbnails=False
|
||||||
|
):
|
||||||
im = BytesIO(rawdata)
|
im = BytesIO(rawdata)
|
||||||
im.seek(0)
|
im.seek(0)
|
||||||
imgdata = None
|
imgdata = None
|
||||||
|
@ -1468,13 +1817,19 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
imgdata = Image.open(im)
|
imgdata = Image.open(im)
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
# test if it is a jpeg2000 image
|
# test if it is a jpeg2000 image
|
||||||
if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
if rawdata[:12] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
||||||
|
# image is jpeg2000
|
||||||
|
imgformat = ImageFormat.JPEG2000
|
||||||
|
if rawdata[:14].lower() == b"id=imagemagick":
|
||||||
|
# image is in MIFF format
|
||||||
|
# this is useful for 16 bit CMYK because PNG cannot do CMYK and thus
|
||||||
|
# we need PIL but PIL cannot do 16 bit
|
||||||
|
imgformat = ImageFormat.MIFF
|
||||||
|
else:
|
||||||
raise ImageOpenError(
|
raise ImageOpenError(
|
||||||
"cannot read input image (not jpeg2000). "
|
"cannot read input image (not jpeg2000). "
|
||||||
"PIL: error reading image: %s" % e
|
"PIL: error reading image: %s" % e
|
||||||
)
|
)
|
||||||
# image is jpeg2000
|
|
||||||
imgformat = ImageFormat.JPEG2000
|
|
||||||
else:
|
else:
|
||||||
logger.debug("PIL format = %s", imgdata.format)
|
logger.debug("PIL format = %s", imgdata.format)
|
||||||
imgformat = None
|
imgformat = None
|
||||||
|
@ -1508,10 +1863,13 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
raise JpegColorspaceError("jpeg can't be monochrome")
|
raise JpegColorspaceError("jpeg can't be monochrome")
|
||||||
if color == Colorspace["P"]:
|
if color == Colorspace["P"]:
|
||||||
raise JpegColorspaceError("jpeg can't have a color palette")
|
raise JpegColorspaceError("jpeg can't have a color palette")
|
||||||
if color == Colorspace["RGBA"]:
|
if color == Colorspace["RGBA"] and imgformat != ImageFormat.JPEG2000:
|
||||||
raise JpegColorspaceError("jpeg can't have an alpha channel")
|
raise JpegColorspaceError("jpeg can't have an alpha channel")
|
||||||
logger.debug("read_images() embeds a JPEG")
|
logger.debug("read_images() embeds a JPEG")
|
||||||
cleanup()
|
cleanup()
|
||||||
|
depth = 8
|
||||||
|
if imgformat == ImageFormat.JPEG2000:
|
||||||
|
*_, depth = jp2.parse(rawdata)
|
||||||
return [
|
return [
|
||||||
(
|
(
|
||||||
color,
|
color,
|
||||||
|
@ -1523,7 +1881,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
imgheightpx,
|
imgheightpx,
|
||||||
[],
|
[],
|
||||||
False,
|
False,
|
||||||
8,
|
depth,
|
||||||
rotation,
|
rotation,
|
||||||
iccp,
|
iccp,
|
||||||
)
|
)
|
||||||
|
@ -1540,6 +1898,77 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
if imgformat == ImageFormat.MPO:
|
if imgformat == ImageFormat.MPO:
|
||||||
result = []
|
result = []
|
||||||
img_page_count = 0
|
img_page_count = 0
|
||||||
|
assert len(imgdata._MpoImageFile__mpoffsets) == len(imgdata.mpinfo[0xB002])
|
||||||
|
num_frames = len(imgdata.mpinfo[0xB002])
|
||||||
|
# An MPO file can be a main image together with one or more thumbnails
|
||||||
|
# if that is the case, then we only include all frames if the
|
||||||
|
# --include-thumbnails option is given. If it is not, such an MPO file
|
||||||
|
# will be embedded as is, so including its thumbnails but showing up
|
||||||
|
# as a single image page in the resulting PDF.
|
||||||
|
num_main_frames = 0
|
||||||
|
num_thumbnail_frames = 0
|
||||||
|
for i, mpent in enumerate(imgdata.mpinfo[0xB002]):
|
||||||
|
# check only the first frame for being the main image
|
||||||
|
if (
|
||||||
|
i == 0
|
||||||
|
and mpent["Attribute"]["DependentParentImageFlag"]
|
||||||
|
and not mpent["Attribute"]["DependentChildImageFlag"]
|
||||||
|
and mpent["Attribute"]["RepresentativeImageFlag"]
|
||||||
|
and mpent["Attribute"]["MPType"] == "Baseline MP Primary Image"
|
||||||
|
):
|
||||||
|
num_main_frames += 1
|
||||||
|
elif (
|
||||||
|
not mpent["Attribute"]["DependentParentImageFlag"]
|
||||||
|
and mpent["Attribute"]["DependentChildImageFlag"]
|
||||||
|
and not mpent["Attribute"]["RepresentativeImageFlag"]
|
||||||
|
and mpent["Attribute"]["MPType"]
|
||||||
|
in [
|
||||||
|
"Large Thumbnail (VGA Equivalent)",
|
||||||
|
"Large Thumbnail (Full HD Equivalent)",
|
||||||
|
]
|
||||||
|
):
|
||||||
|
num_thumbnail_frames += 1
|
||||||
|
logger.debug(f"number of frames: {num_frames}")
|
||||||
|
logger.debug(f"number of main frames: {num_main_frames}")
|
||||||
|
logger.debug(f"number of thumbnail frames: {num_thumbnail_frames}")
|
||||||
|
# this MPO file is a main image plus zero or more thumbnails
|
||||||
|
# embed as-is unless the --include-thumbnails option was given
|
||||||
|
if num_frames == 1 or (
|
||||||
|
not include_thumbnails
|
||||||
|
and num_main_frames == 1
|
||||||
|
and num_thumbnail_frames + 1 == num_frames
|
||||||
|
):
|
||||||
|
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
|
||||||
|
imgdata, imgformat, default_dpi, colorspace, rawdata, rot
|
||||||
|
)
|
||||||
|
if color == Colorspace["1"]:
|
||||||
|
raise JpegColorspaceError("jpeg can't be monochrome")
|
||||||
|
if color == Colorspace["P"]:
|
||||||
|
raise JpegColorspaceError("jpeg can't have a color palette")
|
||||||
|
if color == Colorspace["RGBA"]:
|
||||||
|
raise JpegColorspaceError("jpeg can't have an alpha channel")
|
||||||
|
logger.debug("read_images() embeds an MPO verbatim")
|
||||||
|
cleanup()
|
||||||
|
return [
|
||||||
|
(
|
||||||
|
color,
|
||||||
|
ndpi,
|
||||||
|
ImageFormat.JPEG,
|
||||||
|
rawdata,
|
||||||
|
None,
|
||||||
|
imgwidthpx,
|
||||||
|
imgheightpx,
|
||||||
|
[],
|
||||||
|
False,
|
||||||
|
8,
|
||||||
|
rotation,
|
||||||
|
iccp,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
# If the control flow reaches here, the MPO has more than a single
|
||||||
|
# frame but was not detected to be a main image followed by multiple
|
||||||
|
# thumbnails. We thus treat this MPO as we do other multi-frame images
|
||||||
|
# and include all its frames as individual pages.
|
||||||
for offset, mpent in zip(
|
for offset, mpent in zip(
|
||||||
imgdata._MpoImageFile__mpoffsets, imgdata.mpinfo[0xB002]
|
imgdata._MpoImageFile__mpoffsets, imgdata.mpinfo[0xB002]
|
||||||
):
|
):
|
||||||
|
@ -1637,6 +2066,9 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if imgformat == ImageFormat.MIFF:
|
||||||
|
return parse_miff(rawdata)
|
||||||
|
|
||||||
# If our input is not JPEG or PNG, then we might have a format that
|
# If our input is not JPEG or PNG, then we might have a format that
|
||||||
# supports multiple frames (like TIFF or GIF), so we need a loop to
|
# supports multiple frames (like TIFF or GIF), so we need a loop to
|
||||||
# iterate through all frames of the image.
|
# iterate through all frames of the image.
|
||||||
|
@ -1802,7 +2234,16 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if (
|
if color in [Colorspace.P, Colorspace.PA] and iccp is not None:
|
||||||
|
# PDF does not support palette images with icc profile
|
||||||
|
if color == Colorspace.P:
|
||||||
|
newcolor = Colorspace.RGB
|
||||||
|
newimg = newimg.convert(mode="RGB")
|
||||||
|
elif color == Colorspace.PA:
|
||||||
|
newcolor = Colorspace.RGBA
|
||||||
|
newimg = newimg.convert(mode="RGBA")
|
||||||
|
smaskidat = None
|
||||||
|
elif (
|
||||||
color == Colorspace.RGBA
|
color == Colorspace.RGBA
|
||||||
or color == Colorspace.LA
|
or color == Colorspace.LA
|
||||||
or color == Colorspace.PA
|
or color == Colorspace.PA
|
||||||
|
@ -1816,25 +2257,21 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||||
newcolor = color
|
newcolor = color
|
||||||
l, a = newimg.split()
|
l, a = newimg.split()
|
||||||
newimg = l
|
newimg = l
|
||||||
|
elif color == Colorspace.PA or (
|
||||||
|
color == Colorspace.P and "transparency" in newimg.info
|
||||||
|
):
|
||||||
|
newcolor = color
|
||||||
|
a = newimg.convert(mode="RGBA").split()[-1]
|
||||||
else:
|
else:
|
||||||
newcolor = Colorspace.RGBA
|
newcolor = Colorspace.RGBA
|
||||||
r, g, b, a = newimg.convert(mode="RGBA").split()
|
r, g, b, a = newimg.convert(mode="RGBA").split()
|
||||||
newimg = Image.merge("RGB", (r, g, b))
|
newimg = Image.merge("RGB", (r, g, b))
|
||||||
|
|
||||||
smaskidat, _, _ = to_png_data(a)
|
smaskidat, *_ = to_png_data(a)
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Image contains an alpha channel which will be stored "
|
"Image contains an alpha channel. Computing a separate "
|
||||||
"as a separate soft mask (/SMask) image in PDF."
|
"soft mask (/SMask) image to store transparency in PDF."
|
||||||
)
|
)
|
||||||
elif color in [Colorspace.P, Colorspace.PA] and iccp is not None:
|
|
||||||
# PDF does not support palette images with icc profile
|
|
||||||
if color == Colorspace.P:
|
|
||||||
newcolor = Colorspace.RGB
|
|
||||||
newimg = newimg.convert(mode="RGB")
|
|
||||||
elif color == Colorspace.PA:
|
|
||||||
newcolor = Colorspace.RGBA
|
|
||||||
newimg = newimg.convert(mode="RGBA")
|
|
||||||
smaskidat = None
|
|
||||||
else:
|
else:
|
||||||
newcolor = color
|
newcolor = color
|
||||||
smaskidat = None
|
smaskidat = None
|
||||||
|
@ -2176,7 +2613,6 @@ def find_scale(pagewidth, pageheight):
|
||||||
# as a binary string representing the image content or as filenames to the
|
# as a binary string representing the image content or as filenames to the
|
||||||
# images.
|
# images.
|
||||||
def convert(*images, **kwargs):
|
def convert(*images, **kwargs):
|
||||||
|
|
||||||
_default_kwargs = dict(
|
_default_kwargs = dict(
|
||||||
engine=None,
|
engine=None,
|
||||||
title=None,
|
title=None,
|
||||||
|
@ -2206,6 +2642,7 @@ def convert(*images, **kwargs):
|
||||||
artborder=None,
|
artborder=None,
|
||||||
pdfa=None,
|
pdfa=None,
|
||||||
rotation=None,
|
rotation=None,
|
||||||
|
include_thumbnails=False,
|
||||||
)
|
)
|
||||||
for kwname, default in _default_kwargs.items():
|
for kwname, default in _default_kwargs.items():
|
||||||
if kwname not in kwargs:
|
if kwname not in kwargs:
|
||||||
|
@ -2249,11 +2686,16 @@ def convert(*images, **kwargs):
|
||||||
for img in images:
|
for img in images:
|
||||||
# img is allowed to be a path, a binary string representing image data
|
# img is allowed to be a path, a binary string representing image data
|
||||||
# or a file-like object (really anything that implements read())
|
# or a file-like object (really anything that implements read())
|
||||||
try:
|
# or a pathlib.Path object (really anything that implements read_bytes())
|
||||||
rawdata = img.read()
|
rawdata = None
|
||||||
except AttributeError:
|
for fun in "read", "read_bytes":
|
||||||
|
try:
|
||||||
|
rawdata = getattr(img, fun)()
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
if rawdata is None:
|
||||||
if not isinstance(img, (str, bytes)):
|
if not isinstance(img, (str, bytes)):
|
||||||
raise TypeError("Neither implements read() nor is str or bytes")
|
raise TypeError("Neither read(), read_bytes() nor is str or bytes")
|
||||||
# the thing doesn't have a read() function, so try if we can treat
|
# the thing doesn't have a read() function, so try if we can treat
|
||||||
# it as a file name
|
# it as a file name
|
||||||
try:
|
try:
|
||||||
|
@ -2271,6 +2713,10 @@ def convert(*images, **kwargs):
|
||||||
rawdata = f.read()
|
rawdata = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
# md5 = hashlib.md5(rawdata).hexdigest()
|
||||||
|
# with open("./testdata/" + md5, "wb") as f:
|
||||||
|
# f.write(rawdata)
|
||||||
|
|
||||||
for (
|
for (
|
||||||
color,
|
color,
|
||||||
ndpi,
|
ndpi,
|
||||||
|
@ -2289,6 +2735,7 @@ def convert(*images, **kwargs):
|
||||||
kwargs["colorspace"],
|
kwargs["colorspace"],
|
||||||
kwargs["first_frame_only"],
|
kwargs["first_frame_only"],
|
||||||
kwargs["rotation"],
|
kwargs["rotation"],
|
||||||
|
kwargs["include_thumbnails"],
|
||||||
):
|
):
|
||||||
pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"](
|
pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"](
|
||||||
imgwidthpx, imgheightpx, ndpi
|
imgwidthpx, imgheightpx, ndpi
|
||||||
|
@ -2664,7 +3111,7 @@ def valid_date(string):
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
return parser.parse(string)
|
return parser.parse(string)
|
||||||
except TypeError:
|
except:
|
||||||
pass
|
pass
|
||||||
# as a last resort, try the local date utility
|
# as a last resort, try the local date utility
|
||||||
try:
|
try:
|
||||||
|
@ -2677,7 +3124,7 @@ def valid_date(string):
|
||||||
except subprocess.CalledProcessError:
|
except subprocess.CalledProcessError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
return datetime.utcfromtimestamp(int(utime))
|
return datetime.fromtimestamp(int(utime))
|
||||||
raise argparse.ArgumentTypeError("cannot parse date: %s" % string)
|
raise argparse.ArgumentTypeError("cannot parse date: %s" % string)
|
||||||
|
|
||||||
|
|
||||||
|
@ -3379,7 +3826,35 @@ def gui():
|
||||||
app.mainloop()
|
app.mainloop()
|
||||||
|
|
||||||
|
|
||||||
def main(argv=sys.argv):
|
def file_is_icc(fname):
|
||||||
|
with open(fname, "rb") as f:
|
||||||
|
data = f.read(40)
|
||||||
|
if len(data) < 40:
|
||||||
|
return False
|
||||||
|
return data[36:] == b"acsp"
|
||||||
|
|
||||||
|
|
||||||
|
def validate_icc(fname):
|
||||||
|
if not file_is_icc(fname):
|
||||||
|
raise argparse.ArgumentTypeError('"%s" is not an ICC profile' % fname)
|
||||||
|
return fname
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_icc_profile():
|
||||||
|
for profile in [
|
||||||
|
"/usr/share/color/icc/sRGB.icc",
|
||||||
|
"/usr/share/color/icc/OpenICC/sRGB.icc",
|
||||||
|
"/usr/share/color/icc/colord/sRGB.icc",
|
||||||
|
]:
|
||||||
|
if not os.path.exists(profile):
|
||||||
|
continue
|
||||||
|
if not file_is_icc(profile):
|
||||||
|
continue
|
||||||
|
return profile
|
||||||
|
return "/usr/share/color/icc/sRGB.icc"
|
||||||
|
|
||||||
|
|
||||||
|
def get_main_parser():
|
||||||
rendered_papersizes = ""
|
rendered_papersizes = ""
|
||||||
for k, v in sorted(papersizes.items()):
|
for k, v in sorted(papersizes.items()):
|
||||||
rendered_papersizes += " %-8s %s\n" % (papernames[k], v)
|
rendered_papersizes += " %-8s %s\n" % (papernames[k], v)
|
||||||
|
@ -3420,7 +3895,9 @@ Paper sizes:
|
||||||
the value in the second column has the same effect as giving the short hand
|
the value in the second column has the same effect as giving the short hand
|
||||||
in the first column. Appending ^T (a caret/circumflex followed by the letter
|
in the first column. Appending ^T (a caret/circumflex followed by the letter
|
||||||
T) turns the paper size from portrait into landscape. The postfix thus
|
T) turns the paper size from portrait into landscape. The postfix thus
|
||||||
symbolizes the transpose. The values are case insensitive.
|
symbolizes the transpose. Note that on Windows cmd.exe the caret symbol is
|
||||||
|
the escape character, so you need to put quotes around the option value.
|
||||||
|
The values are case insensitive.
|
||||||
|
|
||||||
%s
|
%s
|
||||||
|
|
||||||
|
@ -3487,7 +3964,7 @@ Examples:
|
||||||
while preserving its aspect ratio and a print border of 2 cm on the top and
|
while preserving its aspect ratio and a print border of 2 cm on the top and
|
||||||
bottom and 2.5 cm on the left and right hand side.
|
bottom and 2.5 cm on the left and right hand side.
|
||||||
|
|
||||||
$ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg
|
$ img2pdf --output out.pdf --pagesize "A4^T" --border 2cm:2.5cm *.jpg
|
||||||
|
|
||||||
On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
|
On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
|
||||||
original image size if the image is smaller than that.
|
original image size if the image is smaller than that.
|
||||||
|
@ -3622,6 +4099,17 @@ RGB.""",
|
||||||
"input image be converted into a page in the resulting PDF.",
|
"input image be converted into a page in the resulting PDF.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
outargs.add_argument(
|
||||||
|
"--include-thumbnails",
|
||||||
|
action="store_true",
|
||||||
|
help="Some multi-frame formats like MPO carry a main image and "
|
||||||
|
"one or more scaled-down copies of the main image (thumbnails). "
|
||||||
|
"In such a case, img2pdf will only include the main image and "
|
||||||
|
"not create additional pages for each of the thumbnails. If this "
|
||||||
|
"option is set, img2pdf will instead create one page per frame and "
|
||||||
|
"thus store each thumbnail on its own page.",
|
||||||
|
)
|
||||||
|
|
||||||
outargs.add_argument(
|
outargs.add_argument(
|
||||||
"--pillow-limit-break",
|
"--pillow-limit-break",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
@ -3633,14 +4121,29 @@ RGB.""",
|
||||||
% Image.MAX_IMAGE_PIXELS,
|
% Image.MAX_IMAGE_PIXELS,
|
||||||
)
|
)
|
||||||
|
|
||||||
outargs.add_argument(
|
if sys.platform == "win32":
|
||||||
"--pdfa",
|
# on Windows, there are no default paths to search for an ICC profile
|
||||||
nargs="?",
|
# so make the argument required instead of optional
|
||||||
const="/usr/share/color/icc/sRGB.icc",
|
outargs.add_argument(
|
||||||
default=None,
|
"--pdfa",
|
||||||
help="Output a PDF/A-1b compliant document. By default, this will "
|
type=validate_icc,
|
||||||
"embed /usr/share/color/icc/sRGB.icc as the color profile.",
|
help="Output a PDF/A-1b compliant document. The argument to this "
|
||||||
)
|
"option is the path to the ICC profile that will be embedded into "
|
||||||
|
"the resulting PDF.",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
outargs.add_argument(
|
||||||
|
"--pdfa",
|
||||||
|
nargs="?",
|
||||||
|
const=get_default_icc_profile(),
|
||||||
|
default=None,
|
||||||
|
type=validate_icc,
|
||||||
|
help="Output a PDF/A-1b compliant document. By default, this will "
|
||||||
|
"embed either /usr/share/color/icc/sRGB.icc, "
|
||||||
|
"/usr/share/color/icc/OpenICC/sRGB.icc or "
|
||||||
|
"/usr/share/color/icc/colord/sRGB.icc as the color profile, whichever "
|
||||||
|
"is found to exist first.",
|
||||||
|
)
|
||||||
|
|
||||||
sizeargs = parser.add_argument_group(
|
sizeargs = parser.add_argument_group(
|
||||||
title="Image and page size and layout arguments",
|
title="Image and page size and layout arguments",
|
||||||
|
@ -3929,8 +4432,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Instruct the PDF viewer to open the PDF in fullscreen mode",
|
help="Instruct the PDF viewer to open the PDF in fullscreen mode",
|
||||||
)
|
)
|
||||||
|
return parser
|
||||||
|
|
||||||
args = parser.parse_args(argv[1:])
|
|
||||||
|
def main(argv=sys.argv):
|
||||||
|
args = get_main_parser().parse_args(argv[1:])
|
||||||
|
|
||||||
if args.verbose:
|
if args.verbose:
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
@ -3954,7 +4460,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
|
||||||
elif len(args.images) == 0 and len(args.from_file) == 0:
|
elif len(args.images) == 0 and len(args.from_file) == 0:
|
||||||
# if no positional arguments were supplied, read a single image from
|
# if no positional arguments were supplied, read a single image from
|
||||||
# standard input
|
# standard input
|
||||||
logger.info("reading image from standard input")
|
print(
|
||||||
|
"Reading image from standard input...\n"
|
||||||
|
"Re-run with -h or --help for usage information.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
images = [sys.stdin.buffer.read()]
|
images = [sys.stdin.buffer.read()]
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
@ -4015,6 +4525,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
|
||||||
artborder=args.art_border,
|
artborder=args.art_border,
|
||||||
pdfa=args.pdfa,
|
pdfa=args.pdfa,
|
||||||
rotation=args.rotation,
|
rotation=args.rotation,
|
||||||
|
include_thumbnails=args.include_thumbnails,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("error: " + str(e))
|
logger.error("error: " + str(e))
|
||||||
|
|
File diff suppressed because it is too large
Load diff
55
src/jp2.py
55
src/jp2.py
|
@ -37,9 +37,8 @@ def getBox(data, byteStart, noBytes):
|
||||||
|
|
||||||
|
|
||||||
def parse_ihdr(data):
|
def parse_ihdr(data):
|
||||||
height = struct.unpack(">I", data[0:4])[0]
|
height, width, channels, bpp = struct.unpack(">IIHB", data[:11])
|
||||||
width = struct.unpack(">I", data[4:8])[0]
|
return width, height, channels, bpp + 1
|
||||||
return width, height
|
|
||||||
|
|
||||||
|
|
||||||
def parse_colr(data):
|
def parse_colr(data):
|
||||||
|
@ -59,8 +58,8 @@ def parse_colr(data):
|
||||||
|
|
||||||
def parse_resc(data):
|
def parse_resc(data):
|
||||||
hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data)
|
hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data)
|
||||||
hdpi = ((hnum / hden) * (10 ** hexp) * 100) / 2.54
|
hdpi = ((hnum / hden) * (10**hexp) * 100) / 2.54
|
||||||
vdpi = ((vnum / vden) * (10 ** vexp) * 100) / 2.54
|
vdpi = ((vnum / vden) * (10**vexp) * 100) / 2.54
|
||||||
return hdpi, vdpi
|
return hdpi, vdpi
|
||||||
|
|
||||||
|
|
||||||
|
@ -85,13 +84,13 @@ def parse_jp2h(data):
|
||||||
while byteStart < noBytes and boxLengthValue != 0:
|
while byteStart < noBytes and boxLengthValue != 0:
|
||||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||||
if boxType == b"ihdr":
|
if boxType == b"ihdr":
|
||||||
width, height = parse_ihdr(boxContents)
|
width, height, channels, bpp = parse_ihdr(boxContents)
|
||||||
elif boxType == b"colr":
|
elif boxType == b"colr":
|
||||||
colorspace = parse_colr(boxContents)
|
colorspace = parse_colr(boxContents)
|
||||||
elif boxType == b"res ":
|
elif boxType == b"res ":
|
||||||
hdpi, vdpi = parse_res(boxContents)
|
hdpi, vdpi = parse_res(boxContents)
|
||||||
byteStart = byteEnd
|
byteStart = byteEnd
|
||||||
return (width, height, colorspace, hdpi, vdpi)
|
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||||
|
|
||||||
|
|
||||||
def parsejp2(data):
|
def parsejp2(data):
|
||||||
|
@ -102,7 +101,9 @@ def parsejp2(data):
|
||||||
while byteStart < noBytes and boxLengthValue != 0:
|
while byteStart < noBytes and boxLengthValue != 0:
|
||||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||||
if boxType == b"jp2h":
|
if boxType == b"jp2h":
|
||||||
width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents)
|
width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h(
|
||||||
|
boxContents
|
||||||
|
)
|
||||||
break
|
break
|
||||||
byteStart = byteEnd
|
byteStart = byteEnd
|
||||||
if not width:
|
if not width:
|
||||||
|
@ -112,13 +113,41 @@ def parsejp2(data):
|
||||||
if not colorspace:
|
if not colorspace:
|
||||||
raise Exception("no colorspace in jp2 header")
|
raise Exception("no colorspace in jp2 header")
|
||||||
# retrieving the dpi is optional so we do not error out if not present
|
# retrieving the dpi is optional so we do not error out if not present
|
||||||
return (width, height, colorspace, hdpi, vdpi)
|
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||||
|
|
||||||
|
|
||||||
|
def parsej2k(data):
|
||||||
|
lsiz, rsiz, xsiz, ysiz, xosiz, yosiz, _, _, _, _, csiz = struct.unpack(
|
||||||
|
">HHIIIIIIIIH", data[4:42]
|
||||||
|
)
|
||||||
|
ssiz = [None] * csiz
|
||||||
|
xrsiz = [None] * csiz
|
||||||
|
yrsiz = [None] * csiz
|
||||||
|
for i in range(csiz):
|
||||||
|
ssiz[i], xrsiz[i], yrsiz[i] = struct.unpack(
|
||||||
|
"BBB", data[42 + 3 * i : 42 + 3 * (i + 1)]
|
||||||
|
)
|
||||||
|
assert ssiz == [7, 7, 7]
|
||||||
|
return xsiz - xosiz, ysiz - yosiz, None, None, None, csiz, 8
|
||||||
|
|
||||||
|
|
||||||
|
def parse(data):
|
||||||
|
if data[:4] == b"\xff\x4f\xff\x51":
|
||||||
|
return parsej2k(data)
|
||||||
|
else:
|
||||||
|
return parsejp2(data)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
width, height, colorspace = parsejp2(open(sys.argv[1]).read())
|
width, height, colorspace, hdpi, vdpi, channels, bpp = parse(
|
||||||
sys.stdout.write("width = %d" % width)
|
open(sys.argv[1], "rb").read()
|
||||||
sys.stdout.write("height = %d" % height)
|
)
|
||||||
sys.stdout.write("colorspace = %s" % colorspace)
|
print("width = %d" % width)
|
||||||
|
print("height = %d" % height)
|
||||||
|
print("colorspace = %s" % colorspace)
|
||||||
|
print("hdpi = %s" % hdpi)
|
||||||
|
print("vdpi = %s" % vdpi)
|
||||||
|
print("channels = %s" % channels)
|
||||||
|
print("bpp = %s" % bpp)
|
||||||
|
|
Loading…
Reference in a new issue