forked from josch/img2pdf
Compare commits
55 commits
alister-pa
...
main
Author | SHA1 | Date | |
---|---|---|---|
819b366bf5 | |||
cc8c708295 | |||
fb9537d8b7 | |||
7678435eb7 | |||
ba7a360866 | |||
7f0bf47ff3 | |||
|
5cd0918d50 | ||
|
f157ced05d | ||
09064e8e70 | |||
2f736d7891 | |||
e05580a49a | |||
acc25a4926 | |||
f597887088 | |||
3e832fbcc2 | |||
1e8557cef1 | |||
29921eeabd | |||
33139612f8 | |||
64d27f4a8b | |||
85cbe1d128 | |||
b25429a4c1 | |||
c703e9df06 | |||
79e9985f35 | |||
cb2644c34f | |||
81502f21af | |||
0cbcb8fa12 | |||
e9e04b6dd9 | |||
fc059ee471 | |||
25466113e9 | |||
7405635b72 | |||
aea472101b | |||
7fa67bb337 | |||
7d40569aa1 | |||
83f9c32328 | |||
be8369373f | |||
10c6901fa3 | |||
57d7e07e6b | |||
272fe0433f | |||
ef7b9e739d | |||
af6fe27d53 | |||
bad6fcae39 | |||
d9b90499f3 | |||
edb0d29a14 | |||
bb3e8b0098 | |||
f454ebc6a6 | |||
c3db273e23 | |||
87afabd3cf | |||
|
5045282cc2 | ||
fb4b96452a | |||
c553e169a4 | |||
d9345ac767 | |||
1d52530229 | |||
3b117e674b | |||
e8ca53738f | |||
7c48bfb868 | |||
244f034a2e |
9 changed files with 1321 additions and 291 deletions
3
.mailmap
Normal file
3
.mailmap
Normal file
|
@ -0,0 +1,3 @@
|
|||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
|
||||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <j.schauer@email.de>
|
||||
Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> <josch@pyneo.org>
|
23
CHANGES.rst
23
CHANGES.rst
|
@ -2,6 +2,29 @@
|
|||
CHANGES
|
||||
=======
|
||||
|
||||
0.5.1 (2023-11-26)
|
||||
------------------
|
||||
|
||||
- no default ICC profile location for PDF/A-1b on Windows
|
||||
- workaround for PNG input without dpi units but non-square dpi aspect ratio
|
||||
|
||||
0.5.0 (2023-10-28)
|
||||
------------------
|
||||
|
||||
- support MIFF for 16 bit CMYK input
|
||||
- accept pathlib.Path objects as input
|
||||
- don't store RGB ICC profiles from bilevel or grayscale TIFF, PNG and JPEG
|
||||
- thumbnails are no longer included by default and --include-thumbnails has to
|
||||
be used if you want them
|
||||
- support for pikepdf (>= 6.2.0)
|
||||
|
||||
0.4.4 (2022-04-07)
|
||||
------------------
|
||||
|
||||
- --viewer-page-layout support for twopageright and twopageleft
|
||||
- Add B and JB paper sizes
|
||||
- support for pikepdf (>= 5.0.0) and Pillow (>= 9.1.0)
|
||||
|
||||
0.4.3 (2021-10-24)
|
||||
------------------
|
||||
|
||||
|
|
39
HACKING
39
HACKING
|
@ -27,6 +27,41 @@ Making a new release
|
|||
|
||||
- Build and upload to pypi:
|
||||
|
||||
$ rm dist/*
|
||||
$ rm -rf dist/*
|
||||
$ python3 setup.py sdist
|
||||
$ twine upload --sign dist/*
|
||||
$ twine upload dist/*
|
||||
|
||||
Using debbisect to find regressions
|
||||
-----------------------------------
|
||||
|
||||
$ debbisect --cache=./cache --depends="git,ca-certificates,python3,
|
||||
ghostscript,imagemagick,mupdf-tools,poppler-utils,python3-pil,
|
||||
python3-pytest,python3-numpy,python3-scipy,python3-pikepdf" \
|
||||
--verbose 2023-09-16 2023-10-24 \
|
||||
'chroot "$1" sh -c "
|
||||
git clone https://gitlab.mister-muffin.de/josch/img2pdf.git
|
||||
&& cd img2pdf
|
||||
&& pytest 'src/img2pdf_test.py::test_jpg_2000_rgba8[internal]"'
|
||||
|
||||
Using debbisect cache
|
||||
---------------------
|
||||
|
||||
$ mmdebstrap --variant=apt --aptopt='Acquire::Check-Valid-Until "false"' \
|
||||
--include=git,ca-certificates,python3,ghostscript,imagemagick \
|
||||
--include=mupdf-tools,poppler-utils,python3-pil,python3-pytest \
|
||||
--include=python3-numpy,python3-scipy,python3-pikepdf \
|
||||
--hook-dir=/usr/share/mmdebstrap/hooks/file-mirror-automount \
|
||||
--setup-hook='mkdir -p "$1/home/josch/git/devscripts/cache/pool/"' \
|
||||
--setup-hook='mount -o ro,bind /home/josch/git/devscripts/cache/pool/ "$1/home/josch/git/devscripts/cache/pool/"' \
|
||||
--chrooted-customize-hook=bash
|
||||
unstable /dev/null
|
||||
file:///home/josch/git/devscripts/cache/archive/debian/20231022T090139Z/
|
||||
|
||||
Bisecting imagemagick
|
||||
---------------------
|
||||
|
||||
$ git clean -fdx && git reset --hard
|
||||
$ ./configure --prefix=$(pwd)/prefix
|
||||
$ make -j$(nproc)
|
||||
$ make install
|
||||
$ LD_LIBRARY_PATH=$(pwd)/prefix/lib prefix/bin/compare ...
|
||||
|
|
50
README.md
50
README.md
|
@ -27,15 +27,15 @@ software, because the raw pixel data never has to be loaded into memory.
|
|||
The following table shows how img2pdf handles different input depending on the
|
||||
input file format and image color space.
|
||||
|
||||
| Format | Colorspace | Result |
|
||||
| -------------------- | ------------------------------ | ------------- |
|
||||
| JPEG | any | direct |
|
||||
| JPEG2000 | any | direct |
|
||||
| PNG (non-interlaced) | any | direct |
|
||||
| TIFF (CCITT Group 4) | monochrome | direct |
|
||||
| any | any except CMYK and monochrome | PNG Paeth |
|
||||
| any | monochrome | CCITT Group 4 |
|
||||
| any | CMYK | flate |
|
||||
| Format | Colorspace | Result |
|
||||
| ------------------------------------- | ------------------------------ | ------------- |
|
||||
| JPEG | any | direct |
|
||||
| JPEG2000 | any | direct |
|
||||
| PNG (non-interlaced, no transparency) | any | direct |
|
||||
| TIFF (CCITT Group 4) | monochrome | direct |
|
||||
| any | any except CMYK and monochrome | PNG Paeth |
|
||||
| any | monochrome | CCITT Group 4 |
|
||||
| any | CMYK | flate |
|
||||
|
||||
For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4
|
||||
encoded data, img2pdf directly embeds the image data into the PDF without
|
||||
|
@ -72,11 +72,6 @@ Bugs
|
|||
when embedded into the PDF cannot be read by the Adobe Acrobat Reader,
|
||||
please contact me.
|
||||
|
||||
- I have not yet figured out how to determine the colorspace of JPEG2000
|
||||
files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000
|
||||
files with other colorspaces, you must explicitly specify it using the
|
||||
`--colorspace` option.
|
||||
|
||||
- An error is produced if the input image is broken. This commonly happens if
|
||||
the input image has an invalid EXIF Orientation value of zero. Even though
|
||||
only nine different values from 1 to 9 are permitted, Anroid phones and
|
||||
|
@ -122,10 +117,9 @@ You can then test the converter using:
|
|||
|
||||
$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg
|
||||
|
||||
For Microsoft Windows users, PyInstaller based .exe files are produced by
|
||||
appveyor. If you don't want to install Python before using img2pdf you can head
|
||||
to appveyor and click on "Artifacts" to download the latest version:
|
||||
https://ci.appveyor.com/project/josch/img2pdf
|
||||
If you don't want to setup Python on Windows, then head to the
|
||||
[releases](/josch/img2pdf/releases) section and download the latest
|
||||
`img2pdf.exe`.
|
||||
|
||||
GUI
|
||||
---
|
||||
|
@ -152,6 +146,10 @@ The package can also be used as a library:
|
|||
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
|
||||
f1.write(img2pdf.convert(f2))
|
||||
|
||||
# opening using pathlib
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert(pathlib.Path('test.jpg')))
|
||||
|
||||
# using in-memory image data
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert("\x89PNG...")
|
||||
|
@ -194,6 +192,11 @@ The package can also be used as a library:
|
|||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert(glob.glob("/path/to/*.jpg")))
|
||||
|
||||
# convert all files matching a glob using pathlib.Path
|
||||
from pathlib import Path
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert(*Path("/path").glob("**/*.jpg")))
|
||||
|
||||
# ignore invalid rotation values in the input images
|
||||
with open("name.pdf","wb") as f:
|
||||
f.write(img2pdf.convert('test.jpg'), rotation=img2pdf.Rotation.ifvalid)
|
||||
|
@ -305,3 +308,14 @@ Tesseract might not do a lossless conversion. For example it converts CMYK
|
|||
input to RGB and removes the alpha channel from images with transparency. For
|
||||
multipage TIFF or animated GIF, it will only convert the first frame.
|
||||
|
||||
Comparison to econvert from ExactImage
|
||||
--------------------------------------
|
||||
|
||||
Like pdflatex and podofoimg2pf, econvert is able to embed JPEG images into PDF
|
||||
directly without re-encoding but when given other file formats, it stores them
|
||||
just using flate compressen, which unnecessarily increases the filesize.
|
||||
Furthermore, it throws an error with CMYK TIF input. It also doesn't store CMYK
|
||||
jpeg files as CMYK but converts them to RGB, so it's not lossless. When trying
|
||||
to feed it 16bit files, it errors out with Unhandled bps/spp combination. It
|
||||
also seems to choose JPEG encoding when using it on some file types (like
|
||||
palette images) making it again not lossless for that input as well.
|
||||
|
|
|
@ -26,7 +26,8 @@ build: off
|
|||
|
||||
after_test:
|
||||
- "%PYTHON%\\python.exe setup.py bdist_wheel"
|
||||
- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole src/img2pdf.py"
|
||||
- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --console --nowindowed --name img2pdf src/img2pdf.py"
|
||||
#- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole --windowed --name img2pdf_windowed src/img2pdf.py"
|
||||
|
||||
artifacts:
|
||||
- path: dist\*
|
||||
|
|
2
setup.py
2
setup.py
|
@ -1,7 +1,7 @@
|
|||
import sys
|
||||
from setuptools import setup
|
||||
|
||||
VERSION = "0.4.3"
|
||||
VERSION = "0.5.1"
|
||||
|
||||
INSTALL_REQUIRES = (
|
||||
"Pillow",
|
||||
|
|
683
src/img2pdf.py
683
src/img2pdf.py
|
@ -22,12 +22,22 @@ import sys
|
|||
import os
|
||||
import zlib
|
||||
import argparse
|
||||
from PIL import Image, TiffImagePlugin
|
||||
from PIL import Image, TiffImagePlugin, GifImagePlugin, ImageCms
|
||||
|
||||
if hasattr(GifImagePlugin, "LoadingStrategy"):
|
||||
# Pillow 9.0.0 started emitting all frames but the first as RGB instead of
|
||||
# P to make sure that more than 256 colors can be represented. But palette
|
||||
# images compress far better than RGB images in PDF so we instruct Pillow
|
||||
# to only emit RGB frames if the palette differs and return P otherwise.
|
||||
# This works since Pillow 9.1.0.
|
||||
GifImagePlugin.LOADING_STRATEGY = (
|
||||
GifImagePlugin.LoadingStrategy.RGB_AFTER_DIFFERENT_PALETTE_ONLY
|
||||
)
|
||||
|
||||
# TiffImagePlugin.DEBUG = True
|
||||
from PIL.ExifTags import TAGS
|
||||
from datetime import datetime
|
||||
from jp2 import parsejp2
|
||||
from datetime import datetime, timezone
|
||||
import jp2
|
||||
from enum import Enum
|
||||
from io import BytesIO
|
||||
import logging
|
||||
|
@ -35,6 +45,8 @@ import struct
|
|||
import platform
|
||||
import hashlib
|
||||
from itertools import chain
|
||||
import re
|
||||
import io
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -50,7 +62,7 @@ try:
|
|||
except ImportError:
|
||||
have_pikepdf = False
|
||||
|
||||
__version__ = "0.4.3"
|
||||
__version__ = "0.5.1"
|
||||
default_dpi = 96.0
|
||||
papersizes = {
|
||||
"letter": "8.5inx11in",
|
||||
|
@ -61,6 +73,20 @@ papersizes = {
|
|||
"a4": "210mmx297mm",
|
||||
"a5": "148mmx210mm",
|
||||
"a6": "105mmx148mm",
|
||||
"b0": "1000mmx1414mm",
|
||||
"b1": "707mmx1000mm",
|
||||
"b2": "500mmx707mm",
|
||||
"b3": "353mmx500mm",
|
||||
"b4": "250mmx353mm",
|
||||
"b5": "176mmx250mm",
|
||||
"b6": "125mmx176mm",
|
||||
"jb0": "1030mmx1456mm",
|
||||
"jb1": "728mmx1030mm",
|
||||
"jb2": "515mmx728mm",
|
||||
"jb3": "364mmx515mm",
|
||||
"jb4": "257mmx364mm",
|
||||
"jb5": "182mmx257mm",
|
||||
"jb6": "128mmx182mm",
|
||||
"legal": "8.5inx14in",
|
||||
"tabloid": "11inx17in",
|
||||
}
|
||||
|
@ -73,6 +99,20 @@ papernames = {
|
|||
"a4": "A4",
|
||||
"a5": "A5",
|
||||
"a6": "A6",
|
||||
"b0": "B0",
|
||||
"b1": "B1",
|
||||
"b2": "B2",
|
||||
"b3": "B3",
|
||||
"b4": "B4",
|
||||
"b5": "B5",
|
||||
"b6": "B6",
|
||||
"jb0": "JB0",
|
||||
"jb1": "JB1",
|
||||
"jb2": "JB2",
|
||||
"jb3": "JB3",
|
||||
"jb4": "JB4",
|
||||
"jb5": "JB5",
|
||||
"jb6": "JB6",
|
||||
"legal": "Legal",
|
||||
"tabloid": "Tabloid",
|
||||
}
|
||||
|
@ -87,11 +127,16 @@ PageOrientation = Enum("PageOrientation", "portrait landscape")
|
|||
|
||||
Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other")
|
||||
|
||||
ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO other")
|
||||
ImageFormat = Enum(
|
||||
"ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other"
|
||||
)
|
||||
|
||||
PageMode = Enum("PageMode", "none outlines thumbs")
|
||||
|
||||
PageLayout = Enum("PageLayout", "single onecolumn twocolumnright twocolumnleft")
|
||||
PageLayout = Enum(
|
||||
"PageLayout",
|
||||
"single onecolumn twocolumnright twocolumnleft twopageright twopageleft",
|
||||
)
|
||||
|
||||
Magnification = Enum("Magnification", "fit fith fitbh")
|
||||
|
||||
|
@ -389,6 +434,28 @@ class ExifOrientationError(Exception):
|
|||
pass
|
||||
|
||||
|
||||
# temporary change the attribute of an object using a context manager
|
||||
class temp_attr:
|
||||
def __init__(self, obj, field, value):
|
||||
self.obj = obj
|
||||
self.field = field
|
||||
self.value = value
|
||||
|
||||
def __enter__(self):
|
||||
self.exists = False
|
||||
if hasattr(self.obj, self.field):
|
||||
self.exists = True
|
||||
self.old_value = getattr(self.obj, self.field)
|
||||
logger.debug(f"setting {self.obj}.{self.field} = {self.value}")
|
||||
setattr(self.obj, self.field, self.value)
|
||||
|
||||
def __exit__(self, exctype, excinst, exctb):
|
||||
if self.exists:
|
||||
setattr(self.obj, self.field, self.old_value)
|
||||
else:
|
||||
delattr(self.obj, self.field)
|
||||
|
||||
|
||||
# without pdfrw this function is a no-op
|
||||
def my_convert_load(string):
|
||||
return string
|
||||
|
@ -655,7 +722,7 @@ class pdfdoc(object):
|
|||
self.writer.docinfo = PdfDict(indirect=True)
|
||||
|
||||
def datetime_to_pdfdate(dt):
|
||||
return dt.strftime("%Y%m%d%H%M%SZ")
|
||||
return dt.astimezone(tz=timezone.utc).strftime("%Y%m%d%H%M%SZ")
|
||||
|
||||
for k in ["Title", "Author", "Creator", "Producer", "Subject"]:
|
||||
v = locals()[k.lower()]
|
||||
|
@ -665,7 +732,7 @@ class pdfdoc(object):
|
|||
v = PdfString.encode(v)
|
||||
self.writer.docinfo[getattr(PdfName, k)] = v
|
||||
|
||||
now = datetime.now()
|
||||
now = datetime.now().astimezone()
|
||||
for k in ["CreationDate", "ModDate"]:
|
||||
v = locals()[k.lower()]
|
||||
if v is None and nodate:
|
||||
|
@ -685,7 +752,7 @@ class pdfdoc(object):
|
|||
)
|
||||
|
||||
def datetime_to_xmpdate(dt):
|
||||
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
return dt.astimezone(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
|
||||
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>
|
||||
|
@ -760,8 +827,10 @@ class pdfdoc(object):
|
|||
artborder=None,
|
||||
iccp=None,
|
||||
):
|
||||
assert (color != Colorspace.RGBA and color != Colorspace.LA) or (
|
||||
imgformat == ImageFormat.PNG and smaskdata is not None
|
||||
assert (
|
||||
color not in [Colorspace.RGBA, Colorspace.LA]
|
||||
or (imgformat == ImageFormat.PNG and smaskdata is not None)
|
||||
or imgformat == ImageFormat.JPEG2000
|
||||
)
|
||||
|
||||
if self.engine == Engine.pikepdf:
|
||||
|
@ -785,7 +854,13 @@ class pdfdoc(object):
|
|||
if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA:
|
||||
colorspace = PdfName.DeviceGray
|
||||
elif color == Colorspace.RGB or color == Colorspace.RGBA:
|
||||
colorspace = PdfName.DeviceRGB
|
||||
if color == Colorspace.RGBA and imgformat == ImageFormat.JPEG2000:
|
||||
# there is no DeviceRGBA and for JPXDecode it is okay to have
|
||||
# no colorspace as the pdf reader is supposed to get this info
|
||||
# from the jpeg2000 payload itself
|
||||
colorspace = None
|
||||
else:
|
||||
colorspace = PdfName.DeviceRGB
|
||||
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
|
||||
colorspace = PdfName.DeviceCMYK
|
||||
elif color == Colorspace.P:
|
||||
|
@ -856,7 +931,8 @@ class pdfdoc(object):
|
|||
image[PdfName.Filter] = ofilter
|
||||
image[PdfName.Width] = imgwidthpx
|
||||
image[PdfName.Height] = imgheightpx
|
||||
image[PdfName.ColorSpace] = colorspace
|
||||
if colorspace is not None:
|
||||
image[PdfName.ColorSpace] = colorspace
|
||||
image[PdfName.BitsPerComponent] = depth
|
||||
|
||||
smask = None
|
||||
|
@ -1106,9 +1182,17 @@ class pdfdoc(object):
|
|||
[initial_page, PdfName.XYZ, NullObject, NullObject, 0]
|
||||
)
|
||||
|
||||
# the /OpenAction array must contain the page as an indirect object
|
||||
# The /OpenAction array must contain the page as an indirect object.
|
||||
# This changed some time after 4.2.0 and on or before 5.0.0 and current
|
||||
# versions require to use .obj or otherwise we get:
|
||||
# TypeError: Can't convert ObjectHelper (or subclass) to Object
|
||||
# implicitly. Use .obj to get access the underlying object.
|
||||
# See https://github.com/pikepdf/pikepdf/issues/313 for details.
|
||||
if self.engine == Engine.pikepdf:
|
||||
initial_page = self.writer.make_indirect(initial_page)
|
||||
if isinstance(initial_page, pikepdf.Page):
|
||||
initial_page = self.writer.make_indirect(initial_page.obj)
|
||||
else:
|
||||
initial_page = self.writer.make_indirect(initial_page)
|
||||
|
||||
if self.magnification == Magnification.fit:
|
||||
catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.Fit])
|
||||
|
@ -1140,6 +1224,14 @@ class pdfdoc(object):
|
|||
catalog[PdfName.PageLayout] = PdfName.TwoColumnRight
|
||||
elif self.page_layout == PageLayout.twocolumnleft:
|
||||
catalog[PdfName.PageLayout] = PdfName.TwoColumnLeft
|
||||
elif self.page_layout == PageLayout.twopageright:
|
||||
catalog[PdfName.PageLayout] = PdfName.TwoPageRight
|
||||
if self.output_version < "1.5":
|
||||
self.output_version = "1.5"
|
||||
elif self.page_layout == PageLayout.twopageleft:
|
||||
catalog[PdfName.PageLayout] = PdfName.TwoPageLeft
|
||||
if self.output_version < "1.5":
|
||||
self.output_version = "1.5"
|
||||
elif self.page_layout is None:
|
||||
pass
|
||||
else:
|
||||
|
@ -1177,8 +1269,11 @@ class pdfdoc(object):
|
|||
|
||||
# now write out the PDF
|
||||
if self.engine == Engine.pikepdf:
|
||||
kwargs = {}
|
||||
if pikepdf.__version__ >= "6.2.0":
|
||||
kwargs["deterministic_id"] = True
|
||||
self.writer.save(
|
||||
outputstream, min_version=self.output_version, linearize=True
|
||||
outputstream, min_version=self.output_version, linearize=True, **kwargs
|
||||
)
|
||||
elif self.engine == Engine.pdfrw:
|
||||
self.writer.trailer.Info = self.writer.docinfo
|
||||
|
@ -1206,7 +1301,7 @@ def get_imgmetadata(
|
|||
if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
|
||||
# this codepath gets called if the PIL installation is not able to
|
||||
# handle JPEG2000 files
|
||||
imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata)
|
||||
imgwidthpx, imgheightpx, ics, hdpi, vdpi, channels, bpp = jp2.parse(rawdata)
|
||||
|
||||
if hdpi is None:
|
||||
hdpi = default_dpi
|
||||
|
@ -1216,7 +1311,19 @@ def get_imgmetadata(
|
|||
else:
|
||||
imgwidthpx, imgheightpx = imgdata.size
|
||||
|
||||
ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi))
|
||||
ndpi = imgdata.info.get("dpi")
|
||||
if ndpi is None:
|
||||
# the PNG plugin of PIL adds the undocumented "aspect" field instead of
|
||||
# the "dpi" field if the PNG pHYs chunk unit is not set to meters
|
||||
if imgformat == ImageFormat.PNG and imgdata.info.get("aspect") is not None:
|
||||
aspect = imgdata.info["aspect"]
|
||||
# make sure not to go below the default dpi
|
||||
if aspect[0] > aspect[1]:
|
||||
ndpi = (default_dpi * aspect[0] / aspect[1], default_dpi)
|
||||
else:
|
||||
ndpi = (default_dpi, default_dpi * aspect[1] / aspect[0])
|
||||
else:
|
||||
ndpi = (default_dpi, default_dpi)
|
||||
# In python3, the returned dpi value for some tiff images will
|
||||
# not be an integer but a float. To make the behaviour of
|
||||
# img2pdf the same between python2 and python3, we convert that
|
||||
|
@ -1226,7 +1333,7 @@ def get_imgmetadata(
|
|||
ics = imgdata.mode
|
||||
|
||||
# GIF and PNG files with transparency are supported
|
||||
if (imgformat == ImageFormat.PNG or imgformat == ImageFormat.GIF) and (
|
||||
if imgformat in [ImageFormat.PNG, ImageFormat.GIF, ImageFormat.JPEG2000] and (
|
||||
ics in ["RGBA", "LA"] or "transparency" in imgdata.info
|
||||
):
|
||||
# Must check the IHDR chunk for the bit depth, because PIL would lossily
|
||||
|
@ -1236,6 +1343,10 @@ def get_imgmetadata(
|
|||
if depth > 8:
|
||||
logger.warning("Image with transparency and a bit depth of %d." % depth)
|
||||
logger.warning("This is unsupported due to PIL limitations.")
|
||||
logger.warning(
|
||||
"If you accept a lossy conversion, you can manually convert "
|
||||
"your images to 8 bit using `convert -depth 8` from imagemagick"
|
||||
)
|
||||
raise AlphaChannelError(
|
||||
"Refusing to work with multiple >8bit channels."
|
||||
)
|
||||
|
@ -1346,6 +1457,53 @@ def get_imgmetadata(
|
|||
iccp = None
|
||||
if "icc_profile" in imgdata.info:
|
||||
iccp = imgdata.info.get("icc_profile")
|
||||
# GIMP saves bilevel TIFF images and palette PNG images with only black and
|
||||
# white in the palette with an RGB ICC profile which is useless
|
||||
# https://gitlab.gnome.org/GNOME/gimp/-/issues/3438
|
||||
# and produces an error in Adobe Acrobat, so we ignore it with a warning.
|
||||
# imagemagick also used to (wrongly) include an RGB ICC profile for bilevel
|
||||
# images: https://github.com/ImageMagick/ImageMagick/issues/2070
|
||||
if iccp is not None and (
|
||||
(color == Colorspace["1"] and imgformat == ImageFormat.TIFF)
|
||||
or (
|
||||
imgformat == ImageFormat.PNG
|
||||
and color == Colorspace.P
|
||||
and rawdata is not None
|
||||
and parse_png(rawdata)[1]
|
||||
in [b"\x00\x00\x00\xff\xff\xff", b"\xff\xff\xff\x00\x00\x00"]
|
||||
)
|
||||
):
|
||||
with io.BytesIO(iccp) as f:
|
||||
prf = ImageCms.ImageCmsProfile(f)
|
||||
if (
|
||||
prf.profile.model == "sRGB"
|
||||
and prf.profile.manufacturer == "GIMP"
|
||||
and prf.profile.profile_description == "GIMP built-in sRGB"
|
||||
):
|
||||
if imgformat == ImageFormat.TIFF:
|
||||
logger.warning(
|
||||
"Ignoring RGB ICC profile in bilevel TIFF produced by GIMP."
|
||||
)
|
||||
elif imgformat == ImageFormat.PNG:
|
||||
logger.warning(
|
||||
"Ignoring RGB ICC profile in 2-color palette PNG produced by GIMP."
|
||||
)
|
||||
logger.warning("https://gitlab.gnome.org/GNOME/gimp/-/issues/3438")
|
||||
iccp = None
|
||||
# SmartAlbums old version (found 2.2.6) exports JPG with only 1 compone
|
||||
# with an RGB ICC profile which is useless.
|
||||
# This produces an error in Adobe Acrobat, so we ignore it with a warning.
|
||||
# Update: Found another case, the JPG is created by Adobe PhotoShop, so we
|
||||
# don't check software anymore.
|
||||
if iccp is not None and (
|
||||
(color == Colorspace["L"] and imgformat == ImageFormat.JPEG)
|
||||
):
|
||||
with io.BytesIO(iccp) as f:
|
||||
prf = ImageCms.ImageCmsProfile(f)
|
||||
|
||||
if prf.profile.xcolor_space not in ("GRAY"):
|
||||
logger.warning("Ignoring non-GRAY ICC profile in Grayscale JPG")
|
||||
iccp = None
|
||||
|
||||
logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
|
||||
|
||||
|
@ -1402,27 +1560,29 @@ def transcode_monochrome(imgdata):
|
|||
# into putting everything into a single strip. Thanks to Andrew Murray for
|
||||
# the hack.
|
||||
#
|
||||
# This can be dropped once this gets merged:
|
||||
# https://github.com/python-pillow/Pillow/pull/5744
|
||||
pillow__getitem__ = TiffImagePlugin.ImageFileDirectory_v2.__getitem__
|
||||
# Since version 8.4.0 Pillow allows us to modify the strip size explicitly
|
||||
tmp_strip_size = (imgdata.size[0] + 7) // 8 * imgdata.size[1]
|
||||
if hasattr(TiffImagePlugin, "STRIP_SIZE"):
|
||||
# we are using Pillow 8.4.0 or later
|
||||
with temp_attr(TiffImagePlugin, "STRIP_SIZE", tmp_strip_size):
|
||||
im.save(newimgio, format="TIFF", compression="group4")
|
||||
else:
|
||||
# only needed for Pillow 8.3.x but works for versions before that as
|
||||
# well
|
||||
pillow__getitem__ = TiffImagePlugin.ImageFileDirectory_v2.__getitem__
|
||||
|
||||
def __getitem__(self, tag):
|
||||
overrides = {
|
||||
TiffImagePlugin.ROWSPERSTRIP: imgdata.size[1],
|
||||
TiffImagePlugin.STRIPBYTECOUNTS: [
|
||||
(imgdata.size[0] + 7) // 8 * imgdata.size[1]
|
||||
],
|
||||
TiffImagePlugin.STRIPOFFSETS: [0],
|
||||
}
|
||||
return overrides.get(tag, pillow__getitem__(self, tag))
|
||||
def __getitem__(self, tag):
|
||||
overrides = {
|
||||
TiffImagePlugin.ROWSPERSTRIP: imgdata.size[1],
|
||||
TiffImagePlugin.STRIPBYTECOUNTS: [tmp_strip_size],
|
||||
TiffImagePlugin.STRIPOFFSETS: [0],
|
||||
}
|
||||
return overrides.get(tag, pillow__getitem__(self, tag))
|
||||
|
||||
# use try/finally to make sure that __getitem__ is reset even if save()
|
||||
# raises an exception
|
||||
try:
|
||||
TiffImagePlugin.ImageFileDirectory_v2.__getitem__ = __getitem__
|
||||
im.save(newimgio, format="TIFF", compression="group4")
|
||||
finally:
|
||||
TiffImagePlugin.ImageFileDirectory_v2.__getitem__ = pillow__getitem__
|
||||
with temp_attr(
|
||||
TiffImagePlugin.ImageFileDirectory_v2, "__getitem__", __getitem__
|
||||
):
|
||||
im.save(newimgio, format="TIFF", compression="group4")
|
||||
|
||||
# Open new image in memory
|
||||
newimgio.seek(0)
|
||||
|
@ -1452,7 +1612,204 @@ def parse_png(rawdata):
|
|||
return pngidat, palette
|
||||
|
||||
|
||||
def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||
miff_re = re.compile(
|
||||
r"""
|
||||
[^\x00-\x20\x7f-\x9f] # the field name must not start with a control char or space
|
||||
[^=]+ # the field name can even contain spaces
|
||||
= # field name and value are separated by an equal sign
|
||||
(?:
|
||||
[^\x00-\x20\x7f-\x9f{}] # either chars that are not braces and not control chars
|
||||
|{[^}]*} # or any kind of char surrounded by braces
|
||||
)+""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
# https://imagemagick.org/script/miff.php
|
||||
# turn off black formatting until python 3.10 is available on more platforms
|
||||
# and we can use match/case
|
||||
# fmt: off
|
||||
def parse_miff(data):
|
||||
results = []
|
||||
header, rest = data.split(b":\x1a", 1)
|
||||
header = header.decode("ISO-8859-1")
|
||||
assert header.lower().startswith("id=imagemagick")
|
||||
hdata = {}
|
||||
for i, line in enumerate(re.findall(miff_re, header)):
|
||||
if not line:
|
||||
continue
|
||||
k, v = line.split("=", 1)
|
||||
if i == 0:
|
||||
assert k.lower() == "id"
|
||||
assert v.lower() == "imagemagick"
|
||||
#match k.lower():
|
||||
# case "class":
|
||||
if k.lower() == "class":
|
||||
#match v:
|
||||
# case "DirectClass" | "PseudoClass":
|
||||
if v in ["DirectClass", "PseudoClass"]:
|
||||
hdata["class"] = v
|
||||
# case _:
|
||||
else:
|
||||
print("cannot understand class", v)
|
||||
# case "colorspace":
|
||||
elif k.lower() == "colorspace":
|
||||
# theoretically RGBA and CMYKA should be supported as well
|
||||
# please teach me how to create such a MIFF file
|
||||
#match v:
|
||||
# case "sRGB" | "CMYK" | "Gray":
|
||||
if v in ["sRGB", "CMYK", "Gray"]:
|
||||
hdata["colorspace"] = v
|
||||
# case _:
|
||||
else:
|
||||
print("cannot understand colorspace", v)
|
||||
# case "depth":
|
||||
elif k.lower() == "depth":
|
||||
#match v:
|
||||
# case "8" | "16" | "32":
|
||||
if v in ["8", "16", "32"]:
|
||||
hdata["depth"] = int(v)
|
||||
# case _:
|
||||
else:
|
||||
print("cannot understand depth", v)
|
||||
# case "colors":
|
||||
elif k.lower() == "colors":
|
||||
hdata["colors"] = int(v)
|
||||
# case "matte":
|
||||
elif k.lower() == "matte":
|
||||
#match v:
|
||||
# case "True":
|
||||
if v == "True":
|
||||
hdata["matte"] = True
|
||||
# case "False":
|
||||
elif v == "False":
|
||||
hdata["matte"] = False
|
||||
# case _:
|
||||
else:
|
||||
print("cannot understand matte", v)
|
||||
# case "columns" | "rows":
|
||||
elif k.lower() in ["columns", "rows"]:
|
||||
hdata[k.lower()] = int(v)
|
||||
# case "compression":
|
||||
elif k.lower() == "compression":
|
||||
print("compression not yet supported")
|
||||
# case "profile":
|
||||
elif k.lower() == "profile":
|
||||
assert v in ["icc", "exif"]
|
||||
hdata["profile"] = v
|
||||
# case "resolution":
|
||||
elif k.lower() == "resolution":
|
||||
dpix, dpiy = v.split("x", 1)
|
||||
hdata["resolution"] = (float(dpix), float(dpiy))
|
||||
|
||||
assert "depth" in hdata
|
||||
assert "columns" in hdata
|
||||
assert "rows" in hdata
|
||||
#match hdata["class"]:
|
||||
# case "DirectClass":
|
||||
if hdata["class"] == "DirectClass":
|
||||
if "colors" in hdata:
|
||||
assert hdata["colors"] == 0
|
||||
#match hdata["colorspace"]:
|
||||
# case "sRGB":
|
||||
if hdata["colorspace"] == "sRGB":
|
||||
numchannels = 3
|
||||
colorspace = Colorspace.RGB
|
||||
# case "CMYK":
|
||||
elif hdata["colorspace"] == "CMYK":
|
||||
numchannels = 4
|
||||
colorspace = Colorspace.CMYK
|
||||
# case "Gray":
|
||||
elif hdata["colorspace"] == "Gray":
|
||||
numchannels = 1
|
||||
colorspace = Colorspace.L
|
||||
if hdata.get("matte"):
|
||||
numchannels += 1
|
||||
if hdata.get("profile"):
|
||||
# there is no key encoding the length of icc or exif data
|
||||
# according to the docs, the profile-icc key is supposed to do this
|
||||
print("FAIL: exif")
|
||||
else:
|
||||
lenimgdata = (
|
||||
hdata["depth"] // 8 * numchannels * hdata["columns"] * hdata["rows"]
|
||||
)
|
||||
assert len(rest) >= lenimgdata, (
|
||||
len(rest),
|
||||
hdata["depth"],
|
||||
numchannels,
|
||||
hdata["columns"],
|
||||
hdata["rows"],
|
||||
lenimgdata,
|
||||
)
|
||||
if colorspace == Colorspace.RGB and hdata["depth"] == 8:
|
||||
newimg = Image.frombytes("RGB", (hdata["columns"], hdata["rows"]), rest[:lenimgdata])
|
||||
imgdata, palette, depth = to_png_data(newimg)
|
||||
assert palette == b""
|
||||
assert depth == hdata["depth"]
|
||||
imgfmt = ImageFormat.PNG
|
||||
else:
|
||||
imgdata = zlib.compress(rest[:lenimgdata])
|
||||
imgfmt = ImageFormat.MIFF
|
||||
results.append(
|
||||
(
|
||||
colorspace,
|
||||
hdata.get("resolution") or (default_dpi, default_dpi),
|
||||
imgfmt,
|
||||
imgdata,
|
||||
None, # smask
|
||||
hdata["columns"],
|
||||
hdata["rows"],
|
||||
[], # palette
|
||||
False, # inverted
|
||||
hdata["depth"],
|
||||
0, # rotation
|
||||
None, # icc profile
|
||||
)
|
||||
)
|
||||
if len(rest) > lenimgdata:
|
||||
# another image is here
|
||||
assert rest[lenimgdata:][:14].lower() == b"id=imagemagick"
|
||||
results.extend(parse_miff(rest[lenimgdata:]))
|
||||
# case "PseudoClass":
|
||||
elif hdata["class"] == "PseudoClass":
|
||||
assert "colors" in hdata
|
||||
if hdata.get("matte"):
|
||||
numchannels = 2
|
||||
else:
|
||||
numchannels = 1
|
||||
lenpal = 3 * hdata["colors"] * hdata["depth"] // 8
|
||||
lenimgdata = numchannels * hdata["rows"] * hdata["columns"]
|
||||
assert len(rest) >= lenpal + lenimgdata, (len(rest), lenpal, lenimgdata)
|
||||
results.append(
|
||||
(
|
||||
Colorspace.RGB,
|
||||
hdata.get("resolution") or (default_dpi, default_dpi),
|
||||
ImageFormat.MIFF,
|
||||
zlib.compress(rest[lenpal : lenpal + lenimgdata]),
|
||||
None, # FIXME: allow alpha channel smask
|
||||
hdata["columns"],
|
||||
hdata["rows"],
|
||||
rest[:lenpal], # palette
|
||||
False, # inverted
|
||||
hdata["depth"],
|
||||
0, # rotation
|
||||
None, # icc profile
|
||||
)
|
||||
)
|
||||
if len(rest) > lenpal + lenimgdata:
|
||||
# another image is here
|
||||
assert rest[lenpal + lenimgdata :][:14].lower() == b"id=imagemagick", (
|
||||
len(rest),
|
||||
lenpal,
|
||||
lenimgdata,
|
||||
)
|
||||
results.extend(parse_miff(rest[lenpal + lenimgdata :]))
|
||||
return results
|
||||
# fmt: on
|
||||
|
||||
|
||||
def read_images(
|
||||
rawdata, colorspace, first_frame_only=False, rot=None, include_thumbnails=False
|
||||
):
|
||||
im = BytesIO(rawdata)
|
||||
im.seek(0)
|
||||
imgdata = None
|
||||
|
@ -1460,13 +1817,19 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
imgdata = Image.open(im)
|
||||
except IOError as e:
|
||||
# test if it is a jpeg2000 image
|
||||
if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
||||
if rawdata[:12] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
||||
# image is jpeg2000
|
||||
imgformat = ImageFormat.JPEG2000
|
||||
if rawdata[:14].lower() == b"id=imagemagick":
|
||||
# image is in MIFF format
|
||||
# this is useful for 16 bit CMYK because PNG cannot do CMYK and thus
|
||||
# we need PIL but PIL cannot do 16 bit
|
||||
imgformat = ImageFormat.MIFF
|
||||
else:
|
||||
raise ImageOpenError(
|
||||
"cannot read input image (not jpeg2000). "
|
||||
"PIL: error reading image: %s" % e
|
||||
)
|
||||
# image is jpeg2000
|
||||
imgformat = ImageFormat.JPEG2000
|
||||
else:
|
||||
logger.debug("PIL format = %s", imgdata.format)
|
||||
imgformat = None
|
||||
|
@ -1500,10 +1863,13 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
raise JpegColorspaceError("jpeg can't be monochrome")
|
||||
if color == Colorspace["P"]:
|
||||
raise JpegColorspaceError("jpeg can't have a color palette")
|
||||
if color == Colorspace["RGBA"]:
|
||||
if color == Colorspace["RGBA"] and imgformat != ImageFormat.JPEG2000:
|
||||
raise JpegColorspaceError("jpeg can't have an alpha channel")
|
||||
logger.debug("read_images() embeds a JPEG")
|
||||
cleanup()
|
||||
depth = 8
|
||||
if imgformat == ImageFormat.JPEG2000:
|
||||
*_, depth = jp2.parse(rawdata)
|
||||
return [
|
||||
(
|
||||
color,
|
||||
|
@ -1515,7 +1881,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
imgheightpx,
|
||||
[],
|
||||
False,
|
||||
8,
|
||||
depth,
|
||||
rotation,
|
||||
iccp,
|
||||
)
|
||||
|
@ -1532,6 +1898,77 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
if imgformat == ImageFormat.MPO:
|
||||
result = []
|
||||
img_page_count = 0
|
||||
assert len(imgdata._MpoImageFile__mpoffsets) == len(imgdata.mpinfo[0xB002])
|
||||
num_frames = len(imgdata.mpinfo[0xB002])
|
||||
# An MPO file can be a main image together with one or more thumbnails
|
||||
# if that is the case, then we only include all frames if the
|
||||
# --include-thumbnails option is given. If it is not, such an MPO file
|
||||
# will be embedded as is, so including its thumbnails but showing up
|
||||
# as a single image page in the resulting PDF.
|
||||
num_main_frames = 0
|
||||
num_thumbnail_frames = 0
|
||||
for i, mpent in enumerate(imgdata.mpinfo[0xB002]):
|
||||
# check only the first frame for being the main image
|
||||
if (
|
||||
i == 0
|
||||
and mpent["Attribute"]["DependentParentImageFlag"]
|
||||
and not mpent["Attribute"]["DependentChildImageFlag"]
|
||||
and mpent["Attribute"]["RepresentativeImageFlag"]
|
||||
and mpent["Attribute"]["MPType"] == "Baseline MP Primary Image"
|
||||
):
|
||||
num_main_frames += 1
|
||||
elif (
|
||||
not mpent["Attribute"]["DependentParentImageFlag"]
|
||||
and mpent["Attribute"]["DependentChildImageFlag"]
|
||||
and not mpent["Attribute"]["RepresentativeImageFlag"]
|
||||
and mpent["Attribute"]["MPType"]
|
||||
in [
|
||||
"Large Thumbnail (VGA Equivalent)",
|
||||
"Large Thumbnail (Full HD Equivalent)",
|
||||
]
|
||||
):
|
||||
num_thumbnail_frames += 1
|
||||
logger.debug(f"number of frames: {num_frames}")
|
||||
logger.debug(f"number of main frames: {num_main_frames}")
|
||||
logger.debug(f"number of thumbnail frames: {num_thumbnail_frames}")
|
||||
# this MPO file is a main image plus zero or more thumbnails
|
||||
# embed as-is unless the --include-thumbnails option was given
|
||||
if num_frames == 1 or (
|
||||
not include_thumbnails
|
||||
and num_main_frames == 1
|
||||
and num_thumbnail_frames + 1 == num_frames
|
||||
):
|
||||
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
|
||||
imgdata, imgformat, default_dpi, colorspace, rawdata, rot
|
||||
)
|
||||
if color == Colorspace["1"]:
|
||||
raise JpegColorspaceError("jpeg can't be monochrome")
|
||||
if color == Colorspace["P"]:
|
||||
raise JpegColorspaceError("jpeg can't have a color palette")
|
||||
if color == Colorspace["RGBA"]:
|
||||
raise JpegColorspaceError("jpeg can't have an alpha channel")
|
||||
logger.debug("read_images() embeds an MPO verbatim")
|
||||
cleanup()
|
||||
return [
|
||||
(
|
||||
color,
|
||||
ndpi,
|
||||
ImageFormat.JPEG,
|
||||
rawdata,
|
||||
None,
|
||||
imgwidthpx,
|
||||
imgheightpx,
|
||||
[],
|
||||
False,
|
||||
8,
|
||||
rotation,
|
||||
iccp,
|
||||
)
|
||||
]
|
||||
# If the control flow reaches here, the MPO has more than a single
|
||||
# frame but was not detected to be a main image followed by multiple
|
||||
# thumbnails. We thus treat this MPO as we do other multi-frame images
|
||||
# and include all its frames as individual pages.
|
||||
for offset, mpent in zip(
|
||||
imgdata._MpoImageFile__mpoffsets, imgdata.mpinfo[0xB002]
|
||||
):
|
||||
|
@ -1629,6 +2066,9 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
)
|
||||
]
|
||||
|
||||
if imgformat == ImageFormat.MIFF:
|
||||
return parse_miff(rawdata)
|
||||
|
||||
# If our input is not JPEG or PNG, then we might have a format that
|
||||
# supports multiple frames (like TIFF or GIF), so we need a loop to
|
||||
# iterate through all frames of the image.
|
||||
|
@ -1794,7 +2234,16 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
)
|
||||
)
|
||||
else:
|
||||
if (
|
||||
if color in [Colorspace.P, Colorspace.PA] and iccp is not None:
|
||||
# PDF does not support palette images with icc profile
|
||||
if color == Colorspace.P:
|
||||
newcolor = Colorspace.RGB
|
||||
newimg = newimg.convert(mode="RGB")
|
||||
elif color == Colorspace.PA:
|
||||
newcolor = Colorspace.RGBA
|
||||
newimg = newimg.convert(mode="RGBA")
|
||||
smaskidat = None
|
||||
elif (
|
||||
color == Colorspace.RGBA
|
||||
or color == Colorspace.LA
|
||||
or color == Colorspace.PA
|
||||
|
@ -1808,25 +2257,21 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
newcolor = color
|
||||
l, a = newimg.split()
|
||||
newimg = l
|
||||
elif color == Colorspace.PA or (
|
||||
color == Colorspace.P and "transparency" in newimg.info
|
||||
):
|
||||
newcolor = color
|
||||
a = newimg.convert(mode="RGBA").split()[-1]
|
||||
else:
|
||||
newcolor = Colorspace.RGBA
|
||||
r, g, b, a = newimg.convert(mode="RGBA").split()
|
||||
newimg = Image.merge("RGB", (r, g, b))
|
||||
|
||||
smaskidat, _, _ = to_png_data(a)
|
||||
smaskidat, *_ = to_png_data(a)
|
||||
logger.warning(
|
||||
"Image contains an alpha channel which will be stored "
|
||||
"as a separate soft mask (/SMask) image in PDF."
|
||||
"Image contains an alpha channel. Computing a separate "
|
||||
"soft mask (/SMask) image to store transparency in PDF."
|
||||
)
|
||||
elif color in [Colorspace.P, Colorspace.PA] and iccp is not None:
|
||||
# PDF does not support palette images with icc profile
|
||||
if color == Colorspace.P:
|
||||
newcolor = Colorspace.RGB
|
||||
newimg = newimg.convert(mode="RGB")
|
||||
elif color == Colorspace.PA:
|
||||
newcolor = Colorspace.RGBA
|
||||
newimg = newimg.convert(mode="RGBA")
|
||||
smaskidat = None
|
||||
else:
|
||||
newcolor = color
|
||||
smaskidat = None
|
||||
|
@ -2147,7 +2592,11 @@ def get_fixed_dpi_layout_fun(fixed_dpi):
|
|||
|
||||
def find_scale(pagewidth, pageheight):
|
||||
"""Find the power of 10 (10, 100, 1000...) that will reduce the scale
|
||||
below the PDF specification limit of 14400 PDF units (=200 inches)"""
|
||||
below the PDF specification limit of 14400 PDF units (=200 inches).
|
||||
In principle we could also choose a scale that is not a power of 10.
|
||||
We use powers of 10 because numbers in the PDF format are represented
|
||||
in base-10 and using powers of 10 will thus just shift the comma and
|
||||
keep the numbers easily readable by humans as well."""
|
||||
from math import log10, ceil
|
||||
|
||||
major = max(pagewidth, pageheight)
|
||||
|
@ -2164,7 +2613,6 @@ def find_scale(pagewidth, pageheight):
|
|||
# as a binary string representing the image content or as filenames to the
|
||||
# images.
|
||||
def convert(*images, **kwargs):
|
||||
|
||||
_default_kwargs = dict(
|
||||
engine=None,
|
||||
title=None,
|
||||
|
@ -2194,6 +2642,7 @@ def convert(*images, **kwargs):
|
|||
artborder=None,
|
||||
pdfa=None,
|
||||
rotation=None,
|
||||
include_thumbnails=False,
|
||||
)
|
||||
for kwname, default in _default_kwargs.items():
|
||||
if kwname not in kwargs:
|
||||
|
@ -2237,11 +2686,16 @@ def convert(*images, **kwargs):
|
|||
for img in images:
|
||||
# img is allowed to be a path, a binary string representing image data
|
||||
# or a file-like object (really anything that implements read())
|
||||
try:
|
||||
rawdata = img.read()
|
||||
except AttributeError:
|
||||
# or a pathlib.Path object (really anything that implements read_bytes())
|
||||
rawdata = None
|
||||
for fun in "read", "read_bytes":
|
||||
try:
|
||||
rawdata = getattr(img, fun)()
|
||||
except AttributeError:
|
||||
pass
|
||||
if rawdata is None:
|
||||
if not isinstance(img, (str, bytes)):
|
||||
raise TypeError("Neither implements read() nor is str or bytes")
|
||||
raise TypeError("Neither read(), read_bytes() nor is str or bytes")
|
||||
# the thing doesn't have a read() function, so try if we can treat
|
||||
# it as a file name
|
||||
try:
|
||||
|
@ -2259,6 +2713,10 @@ def convert(*images, **kwargs):
|
|||
rawdata = f.read()
|
||||
f.close()
|
||||
|
||||
# md5 = hashlib.md5(rawdata).hexdigest()
|
||||
# with open("./testdata/" + md5, "wb") as f:
|
||||
# f.write(rawdata)
|
||||
|
||||
for (
|
||||
color,
|
||||
ndpi,
|
||||
|
@ -2277,6 +2735,7 @@ def convert(*images, **kwargs):
|
|||
kwargs["colorspace"],
|
||||
kwargs["first_frame_only"],
|
||||
kwargs["rotation"],
|
||||
kwargs["include_thumbnails"],
|
||||
):
|
||||
pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"](
|
||||
imgwidthpx, imgheightpx, ndpi
|
||||
|
@ -2652,7 +3111,7 @@ def valid_date(string):
|
|||
else:
|
||||
try:
|
||||
return parser.parse(string)
|
||||
except TypeError:
|
||||
except:
|
||||
pass
|
||||
# as a last resort, try the local date utility
|
||||
try:
|
||||
|
@ -2665,7 +3124,7 @@ def valid_date(string):
|
|||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
else:
|
||||
return datetime.utcfromtimestamp(int(utime))
|
||||
return datetime.fromtimestamp(int(utime))
|
||||
raise argparse.ArgumentTypeError("cannot parse date: %s" % string)
|
||||
|
||||
|
||||
|
@ -3367,7 +3826,35 @@ def gui():
|
|||
app.mainloop()
|
||||
|
||||
|
||||
def main(argv=sys.argv):
|
||||
def file_is_icc(fname):
|
||||
with open(fname, "rb") as f:
|
||||
data = f.read(40)
|
||||
if len(data) < 40:
|
||||
return False
|
||||
return data[36:] == b"acsp"
|
||||
|
||||
|
||||
def validate_icc(fname):
|
||||
if not file_is_icc(fname):
|
||||
raise argparse.ArgumentTypeError('"%s" is not an ICC profile' % fname)
|
||||
return fname
|
||||
|
||||
|
||||
def get_default_icc_profile():
|
||||
for profile in [
|
||||
"/usr/share/color/icc/sRGB.icc",
|
||||
"/usr/share/color/icc/OpenICC/sRGB.icc",
|
||||
"/usr/share/color/icc/colord/sRGB.icc",
|
||||
]:
|
||||
if not os.path.exists(profile):
|
||||
continue
|
||||
if not file_is_icc(profile):
|
||||
continue
|
||||
return profile
|
||||
return "/usr/share/color/icc/sRGB.icc"
|
||||
|
||||
|
||||
def get_main_parser():
|
||||
rendered_papersizes = ""
|
||||
for k, v in sorted(papersizes.items()):
|
||||
rendered_papersizes += " %-8s %s\n" % (papernames[k], v)
|
||||
|
@ -3408,7 +3895,9 @@ Paper sizes:
|
|||
the value in the second column has the same effect as giving the short hand
|
||||
in the first column. Appending ^T (a caret/circumflex followed by the letter
|
||||
T) turns the paper size from portrait into landscape. The postfix thus
|
||||
symbolizes the transpose. The values are case insensitive.
|
||||
symbolizes the transpose. Note that on Windows cmd.exe the caret symbol is
|
||||
the escape character, so you need to put quotes around the option value.
|
||||
The values are case insensitive.
|
||||
|
||||
%s
|
||||
|
||||
|
@ -3475,7 +3964,7 @@ Examples:
|
|||
while preserving its aspect ratio and a print border of 2 cm on the top and
|
||||
bottom and 2.5 cm on the left and right hand side.
|
||||
|
||||
$ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg
|
||||
$ img2pdf --output out.pdf --pagesize "A4^T" --border 2cm:2.5cm *.jpg
|
||||
|
||||
On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
|
||||
original image size if the image is smaller than that.
|
||||
|
@ -3610,6 +4099,17 @@ RGB.""",
|
|||
"input image be converted into a page in the resulting PDF.",
|
||||
)
|
||||
|
||||
outargs.add_argument(
|
||||
"--include-thumbnails",
|
||||
action="store_true",
|
||||
help="Some multi-frame formats like MPO carry a main image and "
|
||||
"one or more scaled-down copies of the main image (thumbnails). "
|
||||
"In such a case, img2pdf will only include the main image and "
|
||||
"not create additional pages for each of the thumbnails. If this "
|
||||
"option is set, img2pdf will instead create one page per frame and "
|
||||
"thus store each thumbnail on its own page.",
|
||||
)
|
||||
|
||||
outargs.add_argument(
|
||||
"--pillow-limit-break",
|
||||
action="store_true",
|
||||
|
@ -3621,14 +4121,29 @@ RGB.""",
|
|||
% Image.MAX_IMAGE_PIXELS,
|
||||
)
|
||||
|
||||
outargs.add_argument(
|
||||
"--pdfa",
|
||||
nargs="?",
|
||||
const="/usr/share/color/icc/sRGB.icc",
|
||||
default=None,
|
||||
help="Output a PDF/A-1b compliant document. By default, this will "
|
||||
"embed /usr/share/color/icc/sRGB.icc as the color profile.",
|
||||
)
|
||||
if sys.platform == "win32":
|
||||
# on Windows, there are no default paths to search for an ICC profile
|
||||
# so make the argument required instead of optional
|
||||
outargs.add_argument(
|
||||
"--pdfa",
|
||||
type=validate_icc,
|
||||
help="Output a PDF/A-1b compliant document. The argument to this "
|
||||
"option is the path to the ICC profile that will be embedded into "
|
||||
"the resulting PDF.",
|
||||
)
|
||||
else:
|
||||
outargs.add_argument(
|
||||
"--pdfa",
|
||||
nargs="?",
|
||||
const=get_default_icc_profile(),
|
||||
default=None,
|
||||
type=validate_icc,
|
||||
help="Output a PDF/A-1b compliant document. By default, this will "
|
||||
"embed either /usr/share/color/icc/sRGB.icc, "
|
||||
"/usr/share/color/icc/OpenICC/sRGB.icc or "
|
||||
"/usr/share/color/icc/colord/sRGB.icc as the color profile, whichever "
|
||||
"is found to exist first.",
|
||||
)
|
||||
|
||||
sizeargs = parser.add_argument_group(
|
||||
title="Image and page size and layout arguments",
|
||||
|
@ -3898,7 +4413,9 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
|
|||
'Valid values are "single" (display single pages), "onecolumn" '
|
||||
'(one continuous column), "twocolumnright" (two continuous '
|
||||
'columns with odd number pages on the right) and "twocolumnleft" '
|
||||
"(two continuous columns with odd numbered pages on the left)",
|
||||
"(two continuous columns with odd numbered pages on the left), "
|
||||
'"twopageright" (two pages with odd numbered page on the right) '
|
||||
'and "twopageleft" (two pages with odd numbered page on the left)',
|
||||
)
|
||||
viewerargs.add_argument(
|
||||
"--viewer-fit-window",
|
||||
|
@ -3915,8 +4432,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
|
|||
action="store_true",
|
||||
help="Instruct the PDF viewer to open the PDF in fullscreen mode",
|
||||
)
|
||||
return parser
|
||||
|
||||
args = parser.parse_args(argv[1:])
|
||||
|
||||
def main(argv=sys.argv):
|
||||
args = get_main_parser().parse_args(argv[1:])
|
||||
|
||||
if args.verbose:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
@ -3940,7 +4460,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
|
|||
elif len(args.images) == 0 and len(args.from_file) == 0:
|
||||
# if no positional arguments were supplied, read a single image from
|
||||
# standard input
|
||||
logger.info("reading image from standard input")
|
||||
print(
|
||||
"Reading image from standard input...\n"
|
||||
"Re-run with -h or --help for usage information.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
try:
|
||||
images = [sys.stdin.buffer.read()]
|
||||
except KeyboardInterrupt:
|
||||
|
@ -4001,6 +4525,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
|
|||
artborder=args.art_border,
|
||||
pdfa=args.pdfa,
|
||||
rotation=args.rotation,
|
||||
include_thumbnails=args.include_thumbnails,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("error: " + str(e))
|
||||
|
|
File diff suppressed because it is too large
Load diff
55
src/jp2.py
55
src/jp2.py
|
@ -37,9 +37,8 @@ def getBox(data, byteStart, noBytes):
|
|||
|
||||
|
||||
def parse_ihdr(data):
|
||||
height = struct.unpack(">I", data[0:4])[0]
|
||||
width = struct.unpack(">I", data[4:8])[0]
|
||||
return width, height
|
||||
height, width, channels, bpp = struct.unpack(">IIHB", data[:11])
|
||||
return width, height, channels, bpp + 1
|
||||
|
||||
|
||||
def parse_colr(data):
|
||||
|
@ -59,8 +58,8 @@ def parse_colr(data):
|
|||
|
||||
def parse_resc(data):
|
||||
hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data)
|
||||
hdpi = ((hnum / hden) * (10 ** hexp) * 100) / 2.54
|
||||
vdpi = ((vnum / vden) * (10 ** vexp) * 100) / 2.54
|
||||
hdpi = ((hnum / hden) * (10**hexp) * 100) / 2.54
|
||||
vdpi = ((vnum / vden) * (10**vexp) * 100) / 2.54
|
||||
return hdpi, vdpi
|
||||
|
||||
|
||||
|
@ -85,13 +84,13 @@ def parse_jp2h(data):
|
|||
while byteStart < noBytes and boxLengthValue != 0:
|
||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||
if boxType == b"ihdr":
|
||||
width, height = parse_ihdr(boxContents)
|
||||
width, height, channels, bpp = parse_ihdr(boxContents)
|
||||
elif boxType == b"colr":
|
||||
colorspace = parse_colr(boxContents)
|
||||
elif boxType == b"res ":
|
||||
hdpi, vdpi = parse_res(boxContents)
|
||||
byteStart = byteEnd
|
||||
return (width, height, colorspace, hdpi, vdpi)
|
||||
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||
|
||||
|
||||
def parsejp2(data):
|
||||
|
@ -102,7 +101,9 @@ def parsejp2(data):
|
|||
while byteStart < noBytes and boxLengthValue != 0:
|
||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||
if boxType == b"jp2h":
|
||||
width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents)
|
||||
width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h(
|
||||
boxContents
|
||||
)
|
||||
break
|
||||
byteStart = byteEnd
|
||||
if not width:
|
||||
|
@ -112,13 +113,41 @@ def parsejp2(data):
|
|||
if not colorspace:
|
||||
raise Exception("no colorspace in jp2 header")
|
||||
# retrieving the dpi is optional so we do not error out if not present
|
||||
return (width, height, colorspace, hdpi, vdpi)
|
||||
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||
|
||||
|
||||
def parsej2k(data):
|
||||
lsiz, rsiz, xsiz, ysiz, xosiz, yosiz, _, _, _, _, csiz = struct.unpack(
|
||||
">HHIIIIIIIIH", data[4:42]
|
||||
)
|
||||
ssiz = [None] * csiz
|
||||
xrsiz = [None] * csiz
|
||||
yrsiz = [None] * csiz
|
||||
for i in range(csiz):
|
||||
ssiz[i], xrsiz[i], yrsiz[i] = struct.unpack(
|
||||
"BBB", data[42 + 3 * i : 42 + 3 * (i + 1)]
|
||||
)
|
||||
assert ssiz == [7, 7, 7]
|
||||
return xsiz - xosiz, ysiz - yosiz, None, None, None, csiz, 8
|
||||
|
||||
|
||||
def parse(data):
|
||||
if data[:4] == b"\xff\x4f\xff\x51":
|
||||
return parsej2k(data)
|
||||
else:
|
||||
return parsejp2(data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
width, height, colorspace = parsejp2(open(sys.argv[1]).read())
|
||||
sys.stdout.write("width = %d" % width)
|
||||
sys.stdout.write("height = %d" % height)
|
||||
sys.stdout.write("colorspace = %s" % colorspace)
|
||||
width, height, colorspace, hdpi, vdpi, channels, bpp = parse(
|
||||
open(sys.argv[1], "rb").read()
|
||||
)
|
||||
print("width = %d" % width)
|
||||
print("height = %d" % height)
|
||||
print("colorspace = %s" % colorspace)
|
||||
print("hdpi = %s" % hdpi)
|
||||
print("vdpi = %s" % vdpi)
|
||||
print("channels = %s" % channels)
|
||||
print("bpp = %s" % bpp)
|
||||
|
|
Loading…
Reference in a new issue