forked from josch/img2pdf
Compare commits
45 commits
pdf-xmp-ti
...
main
Author | SHA1 | Date | |
---|---|---|---|
43c16ac369 | |||
08c4d9beec | |||
9e6eba9f40 | |||
5aeb628506 | |||
b6dbfdb481 | |||
23436114f8 | |||
2d5e4e3cb7 | |||
5e515abb6f | |||
a2e2998fb1 | |||
14948e7ba8 | |||
bcfdf8b54e | |||
9f74740c95 | |||
cbc3d50c63 | |||
4b549592bf | |||
5540365cfd | |||
819b366bf5 | |||
cc8c708295 | |||
fb9537d8b7 | |||
7678435eb7 | |||
ba7a360866 | |||
7f0bf47ff3 | |||
|
5cd0918d50 | ||
|
f157ced05d | ||
09064e8e70 | |||
2f736d7891 | |||
e05580a49a | |||
acc25a4926 | |||
f597887088 | |||
3e832fbcc2 | |||
1e8557cef1 | |||
29921eeabd | |||
33139612f8 | |||
64d27f4a8b | |||
85cbe1d128 | |||
b25429a4c1 | |||
c703e9df06 | |||
79e9985f35 | |||
cb2644c34f | |||
81502f21af | |||
0cbcb8fa12 | |||
e9e04b6dd9 | |||
fc059ee471 | |||
25466113e9 | |||
7405635b72 | |||
aea472101b |
9 changed files with 1019 additions and 384 deletions
23
CHANGES.rst
23
CHANGES.rst
|
@ -2,6 +2,29 @@
|
|||
CHANGES
|
||||
=======
|
||||
|
||||
0.6.0 (2025-02-15)
|
||||
------------------
|
||||
|
||||
- Add support for JBIG2 (generic coding)
|
||||
- Add convert_to_docobject() broken out from convert()
|
||||
- Add pil_get_dpi() broken out from get_imgmetadata()
|
||||
|
||||
0.5.1 (2023-11-26)
|
||||
------------------
|
||||
|
||||
- no default ICC profile location for PDF/A-1b on Windows
|
||||
- workaround for PNG input without dpi units but non-square dpi aspect ratio
|
||||
|
||||
0.5.0 (2023-10-28)
|
||||
------------------
|
||||
|
||||
- support MIFF for 16 bit CMYK input
|
||||
- accept pathlib.Path objects as input
|
||||
- don't store RGB ICC profiles from bilevel or grayscale TIFF, PNG and JPEG
|
||||
- thumbnails are no longer included by default and --include-thumbnails has to
|
||||
be used if you want them
|
||||
- support for pikepdf (>= 6.2.0)
|
||||
|
||||
0.4.4 (2022-04-07)
|
||||
------------------
|
||||
|
||||
|
|
43
HACKING
43
HACKING
|
@ -27,6 +27,45 @@ Making a new release
|
|||
|
||||
- Build and upload to pypi:
|
||||
|
||||
$ rm dist/*
|
||||
$ rm -rf dist/*
|
||||
$ python3 setup.py sdist
|
||||
$ twine upload --sign dist/*
|
||||
$ twine upload dist/*
|
||||
|
||||
- Push everything to git forge
|
||||
|
||||
$ git push
|
||||
|
||||
Using debbisect to find regressions
|
||||
-----------------------------------
|
||||
|
||||
$ debbisect --cache=./cache --depends="git,ca-certificates,python3,
|
||||
ghostscript,imagemagick,mupdf-tools,poppler-utils,python3-pil,
|
||||
python3-pytest,python3-numpy,python3-scipy,python3-pikepdf" \
|
||||
--verbose 2023-09-16 2023-10-24 \
|
||||
'chroot "$1" sh -c "
|
||||
git clone https://gitlab.mister-muffin.de/josch/img2pdf.git
|
||||
&& cd img2pdf
|
||||
&& pytest 'src/img2pdf_test.py::test_jpg_2000_rgba8[internal]"'
|
||||
|
||||
Using debbisect cache
|
||||
---------------------
|
||||
|
||||
$ mmdebstrap --variant=apt --aptopt='Acquire::Check-Valid-Until "false"' \
|
||||
--include=git,ca-certificates,python3,ghostscript,imagemagick \
|
||||
--include=mupdf-tools,poppler-utils,python3-pil,python3-pytest \
|
||||
--include=python3-numpy,python3-scipy,python3-pikepdf \
|
||||
--hook-dir=/usr/share/mmdebstrap/hooks/file-mirror-automount \
|
||||
--setup-hook='mkdir -p "$1/home/josch/git/devscripts/cache/pool/"' \
|
||||
--setup-hook='mount -o ro,bind /home/josch/git/devscripts/cache/pool/ "$1/home/josch/git/devscripts/cache/pool/"' \
|
||||
--chrooted-customize-hook=bash
|
||||
unstable /dev/null
|
||||
file:///home/josch/git/devscripts/cache/archive/debian/20231022T090139Z/
|
||||
|
||||
Bisecting imagemagick
|
||||
---------------------
|
||||
|
||||
$ git clean -fdx && git reset --hard
|
||||
$ ./configure --prefix=$(pwd)/prefix
|
||||
$ make -j$(nproc)
|
||||
$ make install
|
||||
$ LD_LIBRARY_PATH=$(pwd)/prefix/lib prefix/bin/compare ...
|
||||
|
|
22
README.md
22
README.md
|
@ -28,17 +28,19 @@ The following table shows how img2pdf handles different input depending on the
|
|||
input file format and image color space.
|
||||
|
||||
| Format | Colorspace | Result |
|
||||
| ------------------------------------- | ------------------------------ | ------------- |
|
||||
| ------------------------------------- | ------------------------------------ | ------------- |
|
||||
| JPEG | any | direct |
|
||||
| JPEG2000 | any | direct |
|
||||
| PNG (non-interlaced, no transparency) | any | direct |
|
||||
| TIFF (CCITT Group 4) | monochrome | direct |
|
||||
| any | any except CMYK and monochrome | PNG Paeth |
|
||||
| any | monochrome | CCITT Group 4 |
|
||||
| TIFF (CCITT Group 4) | 1-bit monochrome | direct |
|
||||
| JBIG2 (single-page generic coding) | 1-bit monochrome | direct |
|
||||
| any | any except CMYK and 1-bit monochrome | PNG Paeth |
|
||||
| any | 1-bit monochrome | CCITT Group 4 |
|
||||
| any | CMYK | flate |
|
||||
|
||||
For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4
|
||||
encoded data, img2pdf directly embeds the image data into the PDF without
|
||||
For JPEG, JPEG2000, non-interlaced PNG, TIFF images with CCITT Group 4
|
||||
encoded data, and JBIG2 with single-page generic coding (e.g. using `jbig2enc`),
|
||||
img2pdf directly embeds the image data into the PDF without
|
||||
re-encoding it. It thus treats the PDF format merely as a container format for
|
||||
the image data. In these cases, img2pdf only increases the filesize by the size
|
||||
of the PDF container (typically around 500 to 700 bytes). Since data is only
|
||||
|
@ -47,7 +49,7 @@ solutions for these input formats.
|
|||
|
||||
For all other input types, img2pdf first has to transform the pixel data to
|
||||
make it compatible with PDF. In most cases, the PNG Paeth filter is applied to
|
||||
the pixel data. For monochrome input, CCITT Group 4 is used instead. Only for
|
||||
the pixel data. For 1-bit monochrome input, CCITT Group 4 is used instead. Only for
|
||||
CMYK input no filter is applied before finally applying flate compression.
|
||||
|
||||
Usage
|
||||
|
@ -65,6 +67,12 @@ The detailed documentation can be accessed by running:
|
|||
|
||||
$ img2pdf --help
|
||||
|
||||
With no command line arguments supplied, img2pdf will read a single image from
|
||||
standard input and write the resulting PDF to standard output. Here is an
|
||||
example for how to scan directly to PDF using scanimage(1) from SANE:
|
||||
|
||||
$ scanimage --mode=Color --resolution=300 | pnmtojpeg -quality 90 | img2pdf > scan.pdf
|
||||
|
||||
Bugs
|
||||
----
|
||||
|
||||
|
|
2
setup.py
2
setup.py
|
@ -1,7 +1,7 @@
|
|||
import sys
|
||||
from setuptools import setup
|
||||
|
||||
VERSION = "0.4.4"
|
||||
VERSION = "0.6.0"
|
||||
|
||||
INSTALL_REQUIRES = (
|
||||
"Pillow",
|
||||
|
|
549
src/img2pdf.py
549
src/img2pdf.py
|
@ -22,7 +22,7 @@ import sys
|
|||
import os
|
||||
import zlib
|
||||
import argparse
|
||||
from PIL import Image, TiffImagePlugin, GifImagePlugin
|
||||
from PIL import Image, TiffImagePlugin, GifImagePlugin, ImageCms, ExifTags
|
||||
|
||||
if hasattr(GifImagePlugin, "LoadingStrategy"):
|
||||
# Pillow 9.0.0 started emitting all frames but the first as RGB instead of
|
||||
|
@ -36,8 +36,8 @@ if hasattr(GifImagePlugin, "LoadingStrategy"):
|
|||
|
||||
# TiffImagePlugin.DEBUG = True
|
||||
from PIL.ExifTags import TAGS
|
||||
from datetime import datetime
|
||||
from jp2 import parsejp2
|
||||
from datetime import datetime, timezone
|
||||
import jp2
|
||||
from enum import Enum
|
||||
from io import BytesIO
|
||||
import logging
|
||||
|
@ -46,6 +46,7 @@ import platform
|
|||
import hashlib
|
||||
from itertools import chain
|
||||
import re
|
||||
import io
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -61,7 +62,7 @@ try:
|
|||
except ImportError:
|
||||
have_pikepdf = False
|
||||
|
||||
__version__ = "0.4.4"
|
||||
__version__ = "0.6.0"
|
||||
default_dpi = 96.0
|
||||
papersizes = {
|
||||
"letter": "8.5inx11in",
|
||||
|
@ -127,7 +128,7 @@ PageOrientation = Enum("PageOrientation", "portrait landscape")
|
|||
Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other")
|
||||
|
||||
ImageFormat = Enum(
|
||||
"ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other"
|
||||
"ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF JBIG2 other"
|
||||
)
|
||||
|
||||
PageMode = Enum("PageMode", "none outlines thumbs")
|
||||
|
@ -721,7 +722,7 @@ class pdfdoc(object):
|
|||
self.writer.docinfo = PdfDict(indirect=True)
|
||||
|
||||
def datetime_to_pdfdate(dt):
|
||||
return dt.strftime("%Y%m%d%H%M%SZ")
|
||||
return dt.astimezone(tz=timezone.utc).strftime("%Y%m%d%H%M%SZ")
|
||||
|
||||
for k in ["Title", "Author", "Creator", "Producer", "Subject"]:
|
||||
v = locals()[k.lower()]
|
||||
|
@ -731,7 +732,7 @@ class pdfdoc(object):
|
|||
v = PdfString.encode(v)
|
||||
self.writer.docinfo[getattr(PdfName, k)] = v
|
||||
|
||||
now = datetime.now()
|
||||
now = datetime.now().astimezone()
|
||||
for k in ["CreationDate", "ModDate"]:
|
||||
v = locals()[k.lower()]
|
||||
if v is None and nodate:
|
||||
|
@ -751,7 +752,7 @@ class pdfdoc(object):
|
|||
)
|
||||
|
||||
def datetime_to_xmpdate(dt):
|
||||
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
return dt.astimezone(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
|
||||
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>
|
||||
|
@ -826,8 +827,10 @@ class pdfdoc(object):
|
|||
artborder=None,
|
||||
iccp=None,
|
||||
):
|
||||
assert (color != Colorspace.RGBA and color != Colorspace.LA) or (
|
||||
imgformat == ImageFormat.PNG and smaskdata is not None
|
||||
assert (
|
||||
color not in [Colorspace.RGBA, Colorspace.LA]
|
||||
or (imgformat == ImageFormat.PNG and smaskdata is not None)
|
||||
or imgformat == ImageFormat.JPEG2000
|
||||
)
|
||||
|
||||
if self.engine == Engine.pikepdf:
|
||||
|
@ -851,6 +854,12 @@ class pdfdoc(object):
|
|||
if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA:
|
||||
colorspace = PdfName.DeviceGray
|
||||
elif color == Colorspace.RGB or color == Colorspace.RGBA:
|
||||
if color == Colorspace.RGBA and imgformat == ImageFormat.JPEG2000:
|
||||
# there is no DeviceRGBA and for JPXDecode it is okay to have
|
||||
# no colorspace as the pdf reader is supposed to get this info
|
||||
# from the jpeg2000 payload itself
|
||||
colorspace = None
|
||||
else:
|
||||
colorspace = PdfName.DeviceRGB
|
||||
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
|
||||
colorspace = PdfName.DeviceCMYK
|
||||
|
@ -909,6 +918,11 @@ class pdfdoc(object):
|
|||
self.output_version = "1.5" # jpeg2000 needs pdf 1.5
|
||||
elif imgformat is ImageFormat.CCITTGroup4:
|
||||
ofilter = [PdfName.CCITTFaxDecode]
|
||||
elif imgformat is ImageFormat.JBIG2:
|
||||
ofilter = PdfName.JBIG2Decode
|
||||
# JBIG2Decode requires PDF 1.4
|
||||
if self.output_version < "1.4":
|
||||
self.output_version = "1.4"
|
||||
else:
|
||||
ofilter = PdfName.FlateDecode
|
||||
|
||||
|
@ -922,6 +936,7 @@ class pdfdoc(object):
|
|||
image[PdfName.Filter] = ofilter
|
||||
image[PdfName.Width] = imgwidthpx
|
||||
image[PdfName.Height] = imgheightpx
|
||||
if colorspace is not None:
|
||||
image[PdfName.ColorSpace] = colorspace
|
||||
image[PdfName.BitsPerComponent] = depth
|
||||
|
||||
|
@ -1065,7 +1080,7 @@ class pdfdoc(object):
|
|||
self.tostream(stream)
|
||||
return stream.getvalue()
|
||||
|
||||
def tostream(self, outputstream):
|
||||
def finalize(self):
|
||||
if self.engine == Engine.pikepdf:
|
||||
PdfArray = pikepdf.Array
|
||||
PdfDict = pikepdf.Dictionary
|
||||
|
@ -1257,7 +1272,9 @@ class pdfdoc(object):
|
|||
self.writer.addobj(metadata)
|
||||
self.writer.addobj(iccstream)
|
||||
|
||||
# now write out the PDF
|
||||
def tostream(self, outputstream):
|
||||
# write out the PDF
|
||||
# this assumes that finalize() has been invoked beforehand by the caller
|
||||
if self.engine == Engine.pikepdf:
|
||||
kwargs = {}
|
||||
if pikepdf.__version__ >= "6.2.0":
|
||||
|
@ -1266,6 +1283,8 @@ class pdfdoc(object):
|
|||
outputstream, min_version=self.output_version, linearize=True, **kwargs
|
||||
)
|
||||
elif self.engine == Engine.pdfrw:
|
||||
from pdfrw import PdfName, PdfArray
|
||||
|
||||
self.writer.trailer.Info = self.writer.docinfo
|
||||
# setting the version attribute of the pdfrw PdfWriter object will
|
||||
# influence the behaviour of the write() function
|
||||
|
@ -1285,47 +1304,27 @@ class pdfdoc(object):
|
|||
raise ValueError("unknown engine: %s" % self.engine)
|
||||
|
||||
|
||||
def get_imgmetadata(
|
||||
imgdata, imgformat, default_dpi, colorspace, rawdata=None, rotreq=None
|
||||
):
|
||||
if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
|
||||
# this codepath gets called if the PIL installation is not able to
|
||||
# handle JPEG2000 files
|
||||
imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata)
|
||||
|
||||
if hdpi is None:
|
||||
hdpi = default_dpi
|
||||
if vdpi is None:
|
||||
vdpi = default_dpi
|
||||
ndpi = (hdpi, vdpi)
|
||||
def pil_get_dpi(imgdata, imgformat, default_dpi):
|
||||
ndpi = imgdata.info.get("dpi")
|
||||
if ndpi is None:
|
||||
# the PNG plugin of PIL adds the undocumented "aspect" field instead of
|
||||
# the "dpi" field if the PNG pHYs chunk unit is not set to meters
|
||||
if imgformat == ImageFormat.PNG and imgdata.info.get("aspect") is not None:
|
||||
aspect = imgdata.info["aspect"]
|
||||
# make sure not to go below the default dpi
|
||||
if aspect[0] > aspect[1]:
|
||||
ndpi = (default_dpi * aspect[0] / aspect[1], default_dpi)
|
||||
else:
|
||||
imgwidthpx, imgheightpx = imgdata.size
|
||||
ndpi = (default_dpi, default_dpi * aspect[1] / aspect[0])
|
||||
else:
|
||||
ndpi = (default_dpi, default_dpi)
|
||||
|
||||
ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi))
|
||||
# In python3, the returned dpi value for some tiff images will
|
||||
# not be an integer but a float. To make the behaviour of
|
||||
# img2pdf the same between python2 and python3, we convert that
|
||||
# float into an integer by rounding.
|
||||
# Search online for the 72.009 dpi problem for more info.
|
||||
ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
|
||||
ics = imgdata.mode
|
||||
|
||||
# GIF and PNG files with transparency are supported
|
||||
if (imgformat == ImageFormat.PNG or imgformat == ImageFormat.GIF) and (
|
||||
ics in ["RGBA", "LA"] or "transparency" in imgdata.info
|
||||
):
|
||||
# Must check the IHDR chunk for the bit depth, because PIL would lossily
|
||||
# convert 16-bit RGBA/LA images to 8-bit.
|
||||
if imgformat == ImageFormat.PNG and rawdata is not None:
|
||||
depth = rawdata[24]
|
||||
if depth > 8:
|
||||
logger.warning("Image with transparency and a bit depth of %d." % depth)
|
||||
logger.warning("This is unsupported due to PIL limitations.")
|
||||
raise AlphaChannelError(
|
||||
"Refusing to work with multiple >8bit channels."
|
||||
)
|
||||
elif ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info:
|
||||
raise AlphaChannelError("This function must not be called on images with alpha")
|
||||
|
||||
# Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0
|
||||
# Pillow retrieves the DPI from EXIF if it cannot find the DPI in the JPEG
|
||||
|
@ -1342,11 +1341,112 @@ def get_imgmetadata(
|
|||
imgdata.tag_v2.get(TiffImagePlugin.Y_RESOLUTION, default_dpi),
|
||||
)
|
||||
|
||||
return ndpi
|
||||
|
||||
|
||||
def get_imgmetadata(
|
||||
imgdata, imgformat, default_dpi, colorspace, rawdata=None, rotreq=None
|
||||
):
|
||||
if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
|
||||
# this codepath gets called if the PIL installation is not able to
|
||||
# handle JPEG2000 files
|
||||
imgwidthpx, imgheightpx, ics, hdpi, vdpi, channels, bpp = jp2.parse(rawdata)
|
||||
|
||||
if hdpi is None:
|
||||
hdpi = default_dpi
|
||||
if vdpi is None:
|
||||
vdpi = default_dpi
|
||||
ndpi = (hdpi, vdpi)
|
||||
elif imgformat == ImageFormat.JBIG2:
|
||||
imgwidthpx, imgheightpx, xres, yres = struct.unpack(">IIII", rawdata[24:40])
|
||||
INCH_PER_METER = 39.370079
|
||||
if xres == 0:
|
||||
hdpi = default_dpi
|
||||
elif xres < 1000:
|
||||
# If xres is very small, it's likely accidentally expressed in dpi instead
|
||||
# of dpm. See e.g. https://github.com/agl/jbig2enc/issues/86
|
||||
hdpi = xres
|
||||
else:
|
||||
hdpi = int(float(xres) / INCH_PER_METER)
|
||||
if yres == 0:
|
||||
vdpi = default_dpi
|
||||
elif yres < 1000:
|
||||
vdpi = yres
|
||||
else:
|
||||
vdpi = int(float(yres) / INCH_PER_METER)
|
||||
ndpi = (hdpi, vdpi)
|
||||
ics = "1"
|
||||
else:
|
||||
imgwidthpx, imgheightpx = imgdata.size
|
||||
ndpi = pil_get_dpi(imgdata, imgformat, default_dpi)
|
||||
ics = imgdata.mode
|
||||
|
||||
logger.debug("input dpi = %d x %d", *ndpi)
|
||||
|
||||
# GIF and PNG files with transparency are supported
|
||||
if imgformat in [ImageFormat.PNG, ImageFormat.GIF, ImageFormat.JPEG2000] and (
|
||||
ics in ["RGBA", "LA"]
|
||||
or (imgdata is not None and "transparency" in imgdata.info)
|
||||
):
|
||||
# Must check the IHDR chunk for the bit depth, because PIL would lossily
|
||||
# convert 16-bit RGBA/LA images to 8-bit.
|
||||
if imgformat == ImageFormat.PNG and rawdata is not None:
|
||||
depth = rawdata[24]
|
||||
if depth > 8:
|
||||
logger.warning("Image with transparency and a bit depth of %d." % depth)
|
||||
logger.warning("This is unsupported due to PIL limitations.")
|
||||
logger.warning(
|
||||
"If you accept a lossy conversion, you can manually convert "
|
||||
"your images to 8 bit using `convert -depth 8` from imagemagick"
|
||||
)
|
||||
raise AlphaChannelError(
|
||||
"Refusing to work with multiple >8bit channels."
|
||||
)
|
||||
elif ics in ["LA", "PA", "RGBA"] or (
|
||||
imgdata is not None and "transparency" in imgdata.info
|
||||
):
|
||||
raise AlphaChannelError("This function must not be called on images with alpha")
|
||||
|
||||
rotation = 0
|
||||
if rotreq in (None, Rotation.auto, Rotation.ifvalid):
|
||||
if hasattr(imgdata, "_getexif") and imgdata._getexif() is not None:
|
||||
if hasattr(imgdata, "getexif") and imgdata.getexif() is not None:
|
||||
exif_dict = imgdata.getexif()
|
||||
o_key = ExifTags.Base.Orientation.value # 274 rsp. 0x112
|
||||
if exif_dict and o_key in exif_dict:
|
||||
# Detailed information on EXIF rotation tags:
|
||||
# http://impulseadventure.com/photo/exif-orientation.html
|
||||
value = exif_dict[o_key]
|
||||
if value == 1:
|
||||
rotation = 0
|
||||
elif value == 6:
|
||||
rotation = 90
|
||||
elif value == 3:
|
||||
rotation = 180
|
||||
elif value == 8:
|
||||
rotation = 270
|
||||
elif value in (2, 4, 5, 7):
|
||||
if rotreq == Rotation.ifvalid:
|
||||
logger.warning(
|
||||
"Unsupported flipped rotation mode (%d): use "
|
||||
"--rotation=ifvalid or "
|
||||
"rotation=img2pdf.Rotation.ifvalid to ignore",
|
||||
value,
|
||||
)
|
||||
else:
|
||||
raise ExifOrientationError(
|
||||
"Unsupported flipped rotation mode (%d): use "
|
||||
"--rotation=ifvalid or "
|
||||
"rotation=img2pdf.Rotation.ifvalid to ignore" % value
|
||||
)
|
||||
else:
|
||||
if rotreq == Rotation.ifvalid:
|
||||
logger.warning("Invalid rotation (%d)", value)
|
||||
else:
|
||||
raise ExifOrientationError(
|
||||
"Invalid rotation (%d): use --rotation=ifvalid "
|
||||
"or rotation=img2pdf.Rotation.ifvalid to ignore" % value
|
||||
)
|
||||
elif hasattr(imgdata, "_getexif") and imgdata._getexif() is not None:
|
||||
for tag, value in imgdata._getexif().items():
|
||||
if TAGS.get(tag, tag) == "Orientation":
|
||||
# Detailed information on EXIF rotation tags:
|
||||
|
@ -1381,6 +1481,7 @@ def get_imgmetadata(
|
|||
"Invalid rotation (%d): use --rotation=ifvalid "
|
||||
"or rotation=img2pdf.Rotation.ifvalid to ignore" % value
|
||||
)
|
||||
|
||||
elif rotreq in (Rotation.none, Rotation["0"]):
|
||||
rotation = 0
|
||||
elif rotreq == Rotation["90"]:
|
||||
|
@ -1429,8 +1530,55 @@ def get_imgmetadata(
|
|||
logger.debug("input colorspace = %s", color.name)
|
||||
|
||||
iccp = None
|
||||
if "icc_profile" in imgdata.info:
|
||||
if imgdata is not None and "icc_profile" in imgdata.info:
|
||||
iccp = imgdata.info.get("icc_profile")
|
||||
# GIMP saves bilevel TIFF images and palette PNG images with only black and
|
||||
# white in the palette with an RGB ICC profile which is useless
|
||||
# https://gitlab.gnome.org/GNOME/gimp/-/issues/3438
|
||||
# and produces an error in Adobe Acrobat, so we ignore it with a warning.
|
||||
# imagemagick also used to (wrongly) include an RGB ICC profile for bilevel
|
||||
# images: https://github.com/ImageMagick/ImageMagick/issues/2070
|
||||
if iccp is not None and (
|
||||
(color == Colorspace["1"] and imgformat == ImageFormat.TIFF)
|
||||
or (
|
||||
imgformat == ImageFormat.PNG
|
||||
and color == Colorspace.P
|
||||
and rawdata is not None
|
||||
and parse_png(rawdata)[1]
|
||||
in [b"\x00\x00\x00\xff\xff\xff", b"\xff\xff\xff\x00\x00\x00"]
|
||||
)
|
||||
):
|
||||
with io.BytesIO(iccp) as f:
|
||||
prf = ImageCms.ImageCmsProfile(f)
|
||||
if (
|
||||
prf.profile.model == "sRGB"
|
||||
and prf.profile.manufacturer == "GIMP"
|
||||
and prf.profile.profile_description == "GIMP built-in sRGB"
|
||||
):
|
||||
if imgformat == ImageFormat.TIFF:
|
||||
logger.warning(
|
||||
"Ignoring RGB ICC profile in bilevel TIFF produced by GIMP."
|
||||
)
|
||||
elif imgformat == ImageFormat.PNG:
|
||||
logger.warning(
|
||||
"Ignoring RGB ICC profile in 2-color palette PNG produced by GIMP."
|
||||
)
|
||||
logger.warning("https://gitlab.gnome.org/GNOME/gimp/-/issues/3438")
|
||||
iccp = None
|
||||
# SmartAlbums old version (found 2.2.6) exports JPG with only 1 compone
|
||||
# with an RGB ICC profile which is useless.
|
||||
# This produces an error in Adobe Acrobat, so we ignore it with a warning.
|
||||
# Update: Found another case, the JPG is created by Adobe PhotoShop, so we
|
||||
# don't check software anymore.
|
||||
if iccp is not None and (
|
||||
(color == Colorspace["L"] and imgformat == ImageFormat.JPEG)
|
||||
):
|
||||
with io.BytesIO(iccp) as f:
|
||||
prf = ImageCms.ImageCmsProfile(f)
|
||||
|
||||
if prf.profile.xcolor_space not in ("GRAY"):
|
||||
logger.warning("Ignoring non-GRAY ICC profile in Grayscale JPG")
|
||||
iccp = None
|
||||
|
||||
logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
|
||||
|
||||
|
@ -1551,6 +1699,7 @@ miff_re = re.compile(
|
|||
re.VERBOSE,
|
||||
)
|
||||
|
||||
|
||||
# https://imagemagick.org/script/miff.php
|
||||
# turn off black formatting until python 3.10 is available on more platforms
|
||||
# and we can use match/case
|
||||
|
@ -1649,7 +1798,7 @@ def parse_miff(data):
|
|||
elif hdata["colorspace"] == "Gray":
|
||||
numchannels = 1
|
||||
colorspace = Colorspace.L
|
||||
if hdata["matte"]:
|
||||
if hdata.get("matte"):
|
||||
numchannels += 1
|
||||
if hdata.get("profile"):
|
||||
# there is no key encoding the length of icc or exif data
|
||||
|
@ -1699,7 +1848,7 @@ def parse_miff(data):
|
|||
# case "PseudoClass":
|
||||
elif hdata["class"] == "PseudoClass":
|
||||
assert "colors" in hdata
|
||||
if hdata["matte"]:
|
||||
if hdata.get("matte"):
|
||||
numchannels = 2
|
||||
else:
|
||||
numchannels = 1
|
||||
|
@ -1732,9 +1881,9 @@ def parse_miff(data):
|
|||
results.extend(parse_miff(rest[lenpal + lenimgdata :]))
|
||||
return results
|
||||
# fmt: on
|
||||
|
||||
|
||||
def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
||||
def read_images(
|
||||
rawdata, colorspace, first_frame_only=False, rot=None, include_thumbnails=False
|
||||
):
|
||||
im = BytesIO(rawdata)
|
||||
im.seek(0)
|
||||
imgdata = None
|
||||
|
@ -1745,7 +1894,51 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
if rawdata[:12] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
|
||||
# image is jpeg2000
|
||||
imgformat = ImageFormat.JPEG2000
|
||||
if rawdata[:14].lower() == b"id=imagemagick":
|
||||
elif rawdata[:8] == b"\x97\x4a\x42\x32\x0d\x0a\x1a\x0a":
|
||||
# For now we only support single-page generic coding of JBIG2, for example as generated by
|
||||
# https://github.com/agl/jbig2enc
|
||||
#
|
||||
# In fact, you can pipe an example image `like src/tests/input/mono.png` directly into img2pdf:
|
||||
# jbig2 src/tests/input/mono.png | img2pdf -o src/tests/output/mono.png.pdf
|
||||
#
|
||||
# For this we assume that the first 13 bytes are the JBIG file header describing a document with one page,
|
||||
# followed by a "page information" segment describing the dimensions of that page.
|
||||
#
|
||||
# The following annotated `hexdump -C 042.jb2` shows the first 40 bytes that we inspect directly.
|
||||
# The first 24 bytes (until "||") have to match exactly, while the following 16 bytes are read by get_imgmetadata.
|
||||
#
|
||||
# 97 4a 42 32 0d 0a 1a 0a 01 00 00 00 01 00 00 00
|
||||
# \_____________________/ | \_________/ \______
|
||||
# magic-bytes org/unk pages seg-num
|
||||
#
|
||||
# 00 30 00 01 00 00 00 13 || 00 00 00 73 00 00 00 30
|
||||
# _/ | | | \_________/ || \_________/ \_________/
|
||||
# type refs page seg-size || width-px height-px
|
||||
#
|
||||
# 00 00 00 48 00 00 00 48
|
||||
# \_________/ \_________/
|
||||
# xres yres
|
||||
#
|
||||
# For more information on the data format, see:
|
||||
# * https://github.com/agl/jbig2enc/blob/ea05019/fcd14492.pdf
|
||||
# For more information about the generic coding, see:
|
||||
# * https://github.com/agl/jbig2enc/blob/ea05019/src/jbig2enc.cc#L898
|
||||
imgformat = ImageFormat.JBIG2
|
||||
if (
|
||||
rawdata[:24]
|
||||
!= b"\x97\x4a\x42\x32\x0d\x0a\x1a\x0a\x01\x00\x00\x00\x01\x00\x00\x00\x00\x30\x00\x01\x00\x00\x00\x13"
|
||||
):
|
||||
raise ImageOpenError(
|
||||
"Unsupported JBIG2 format; only single-page generic coding is supported (e.g. from `jbig2enc`)."
|
||||
)
|
||||
if (
|
||||
rawdata[-22:]
|
||||
!= b"\x00\x00\x00\x021\x00\x01\x00\x00\x00\x00\x00\x00\x00\x033\x00\x01\x00\x00\x00\x00"
|
||||
):
|
||||
raise ImageOpenError(
|
||||
"Unsupported JBIG2 format; we expect end-of-page and end-of-file segments at the end (e.g. from `jbig2enc`)."
|
||||
)
|
||||
elif rawdata[:14].lower() == b"id=imagemagick":
|
||||
# image is in MIFF format
|
||||
# this is useful for 16 bit CMYK because PNG cannot do CMYK and thus
|
||||
# we need PIL but PIL cannot do 16 bit
|
||||
|
@ -1757,12 +1950,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
)
|
||||
else:
|
||||
logger.debug("PIL format = %s", imgdata.format)
|
||||
imgformat = None
|
||||
for f in ImageFormat:
|
||||
if f.name == imgdata.format:
|
||||
imgformat = f
|
||||
if imgformat is None:
|
||||
imgformat = ImageFormat.other
|
||||
imgformat = getattr(ImageFormat, imgdata.format, ImageFormat.other)
|
||||
|
||||
def cleanup():
|
||||
if imgdata is not None:
|
||||
|
@ -1788,10 +1976,13 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
raise JpegColorspaceError("jpeg can't be monochrome")
|
||||
if color == Colorspace["P"]:
|
||||
raise JpegColorspaceError("jpeg can't have a color palette")
|
||||
if color == Colorspace["RGBA"]:
|
||||
if color == Colorspace["RGBA"] and imgformat != ImageFormat.JPEG2000:
|
||||
raise JpegColorspaceError("jpeg can't have an alpha channel")
|
||||
logger.debug("read_images() embeds a JPEG")
|
||||
cleanup()
|
||||
depth = 8
|
||||
if imgformat == ImageFormat.JPEG2000:
|
||||
*_, depth = jp2.parse(rawdata)
|
||||
return [
|
||||
(
|
||||
color,
|
||||
|
@ -1803,7 +1994,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
imgheightpx,
|
||||
[],
|
||||
False,
|
||||
8,
|
||||
depth,
|
||||
rotation,
|
||||
iccp,
|
||||
)
|
||||
|
@ -1820,6 +2011,77 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
if imgformat == ImageFormat.MPO:
|
||||
result = []
|
||||
img_page_count = 0
|
||||
assert len(imgdata._MpoImageFile__mpoffsets) == len(imgdata.mpinfo[0xB002])
|
||||
num_frames = len(imgdata.mpinfo[0xB002])
|
||||
# An MPO file can be a main image together with one or more thumbnails
|
||||
# if that is the case, then we only include all frames if the
|
||||
# --include-thumbnails option is given. If it is not, such an MPO file
|
||||
# will be embedded as is, so including its thumbnails but showing up
|
||||
# as a single image page in the resulting PDF.
|
||||
num_main_frames = 0
|
||||
num_thumbnail_frames = 0
|
||||
for i, mpent in enumerate(imgdata.mpinfo[0xB002]):
|
||||
# check only the first frame for being the main image
|
||||
if (
|
||||
i == 0
|
||||
and mpent["Attribute"]["DependentParentImageFlag"]
|
||||
and not mpent["Attribute"]["DependentChildImageFlag"]
|
||||
and mpent["Attribute"]["RepresentativeImageFlag"]
|
||||
and mpent["Attribute"]["MPType"] == "Baseline MP Primary Image"
|
||||
):
|
||||
num_main_frames += 1
|
||||
elif (
|
||||
not mpent["Attribute"]["DependentParentImageFlag"]
|
||||
and mpent["Attribute"]["DependentChildImageFlag"]
|
||||
and not mpent["Attribute"]["RepresentativeImageFlag"]
|
||||
and mpent["Attribute"]["MPType"]
|
||||
in [
|
||||
"Large Thumbnail (VGA Equivalent)",
|
||||
"Large Thumbnail (Full HD Equivalent)",
|
||||
]
|
||||
):
|
||||
num_thumbnail_frames += 1
|
||||
logger.debug(f"number of frames: {num_frames}")
|
||||
logger.debug(f"number of main frames: {num_main_frames}")
|
||||
logger.debug(f"number of thumbnail frames: {num_thumbnail_frames}")
|
||||
# this MPO file is a main image plus zero or more thumbnails
|
||||
# embed as-is unless the --include-thumbnails option was given
|
||||
if num_frames == 1 or (
|
||||
not include_thumbnails
|
||||
and num_main_frames == 1
|
||||
and num_thumbnail_frames + 1 == num_frames
|
||||
):
|
||||
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
|
||||
imgdata, imgformat, default_dpi, colorspace, rawdata, rot
|
||||
)
|
||||
if color == Colorspace["1"]:
|
||||
raise JpegColorspaceError("jpeg can't be monochrome")
|
||||
if color == Colorspace["P"]:
|
||||
raise JpegColorspaceError("jpeg can't have a color palette")
|
||||
if color == Colorspace["RGBA"]:
|
||||
raise JpegColorspaceError("jpeg can't have an alpha channel")
|
||||
logger.debug("read_images() embeds an MPO verbatim")
|
||||
cleanup()
|
||||
return [
|
||||
(
|
||||
color,
|
||||
ndpi,
|
||||
ImageFormat.JPEG,
|
||||
rawdata,
|
||||
None,
|
||||
imgwidthpx,
|
||||
imgheightpx,
|
||||
[],
|
||||
False,
|
||||
8,
|
||||
rotation,
|
||||
iccp,
|
||||
)
|
||||
]
|
||||
# If the control flow reaches here, the MPO has more than a single
|
||||
# frame but was not detected to be a main image followed by multiple
|
||||
# thumbnails. We thus treat this MPO as we do other multi-frame images
|
||||
# and include all its frames as individual pages.
|
||||
for offset, mpent in zip(
|
||||
imgdata._MpoImageFile__mpoffsets, imgdata.mpinfo[0xB002]
|
||||
):
|
||||
|
@ -1917,6 +2179,28 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
)
|
||||
]
|
||||
|
||||
if imgformat == ImageFormat.JBIG2:
|
||||
color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
|
||||
imgdata, imgformat, default_dpi, colorspace, rawdata, rot
|
||||
)
|
||||
streamdata = rawdata[13:-22] # Strip file header and footer
|
||||
return [
|
||||
(
|
||||
color,
|
||||
ndpi,
|
||||
imgformat,
|
||||
streamdata,
|
||||
None,
|
||||
imgwidthpx,
|
||||
imgheightpx,
|
||||
[],
|
||||
False,
|
||||
1,
|
||||
rotation,
|
||||
iccp,
|
||||
)
|
||||
]
|
||||
|
||||
if imgformat == ImageFormat.MIFF:
|
||||
return parse_miff(rawdata)
|
||||
|
||||
|
@ -2085,7 +2369,16 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
)
|
||||
)
|
||||
else:
|
||||
if (
|
||||
if color in [Colorspace.P, Colorspace.PA] and iccp is not None:
|
||||
# PDF does not support palette images with icc profile
|
||||
if color == Colorspace.P:
|
||||
newcolor = Colorspace.RGB
|
||||
newimg = newimg.convert(mode="RGB")
|
||||
elif color == Colorspace.PA:
|
||||
newcolor = Colorspace.RGBA
|
||||
newimg = newimg.convert(mode="RGBA")
|
||||
smaskidat = None
|
||||
elif (
|
||||
color == Colorspace.RGBA
|
||||
or color == Colorspace.LA
|
||||
or color == Colorspace.PA
|
||||
|
@ -2099,25 +2392,21 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
|
|||
newcolor = color
|
||||
l, a = newimg.split()
|
||||
newimg = l
|
||||
elif color == Colorspace.PA or (
|
||||
color == Colorspace.P and "transparency" in newimg.info
|
||||
):
|
||||
newcolor = color
|
||||
a = newimg.convert(mode="RGBA").split()[-1]
|
||||
else:
|
||||
newcolor = Colorspace.RGBA
|
||||
r, g, b, a = newimg.convert(mode="RGBA").split()
|
||||
newimg = Image.merge("RGB", (r, g, b))
|
||||
|
||||
smaskidat, _, _ = to_png_data(a)
|
||||
smaskidat, *_ = to_png_data(a)
|
||||
logger.warning(
|
||||
"Image contains an alpha channel. Computing a separate "
|
||||
"soft mask (/SMask) image to store transparency in PDF."
|
||||
)
|
||||
elif color in [Colorspace.P, Colorspace.PA] and iccp is not None:
|
||||
# PDF does not support palette images with icc profile
|
||||
if color == Colorspace.P:
|
||||
newcolor = Colorspace.RGB
|
||||
newimg = newimg.convert(mode="RGB")
|
||||
elif color == Colorspace.PA:
|
||||
newcolor = Colorspace.RGBA
|
||||
newimg = newimg.convert(mode="RGBA")
|
||||
smaskidat = None
|
||||
else:
|
||||
newcolor = color
|
||||
smaskidat = None
|
||||
|
@ -2451,14 +2740,11 @@ def find_scale(pagewidth, pageheight):
|
|||
return 10 ** ceil(log10(oversized))
|
||||
|
||||
|
||||
# given one or more input image, depending on outputstream, either return a
|
||||
# string containing the whole PDF if outputstream is None or write the PDF
|
||||
# data to the given file-like object and return None
|
||||
#
|
||||
# Input images can be given as file like objects (they must implement read()),
|
||||
# as a binary string representing the image content or as filenames to the
|
||||
# images.
|
||||
def convert(*images, **kwargs):
|
||||
# Convert the image(s) to a `pdfdoc` object.
|
||||
# The `.writer` attribute holds the underlying engine document handle, and
|
||||
# `.output_version` the minimum version the caller should use when saving.
|
||||
# The main convert() wraps this implementation function.
|
||||
def convert_to_docobject(*images, **kwargs):
|
||||
_default_kwargs = dict(
|
||||
engine=None,
|
||||
title=None,
|
||||
|
@ -2479,7 +2765,6 @@ def convert(*images, **kwargs):
|
|||
viewer_fit_window=False,
|
||||
viewer_center_window=False,
|
||||
viewer_fullscreen=False,
|
||||
outputstream=None,
|
||||
first_frame_only=False,
|
||||
allow_oversized=True,
|
||||
cropborder=None,
|
||||
|
@ -2488,6 +2773,7 @@ def convert(*images, **kwargs):
|
|||
artborder=None,
|
||||
pdfa=None,
|
||||
rotation=None,
|
||||
include_thumbnails=False,
|
||||
)
|
||||
for kwname, default in _default_kwargs.items():
|
||||
if kwname not in kwargs:
|
||||
|
@ -2580,6 +2866,7 @@ def convert(*images, **kwargs):
|
|||
kwargs["colorspace"],
|
||||
kwargs["first_frame_only"],
|
||||
kwargs["rotation"],
|
||||
kwargs["include_thumbnails"],
|
||||
):
|
||||
pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"](
|
||||
imgwidthpx, imgheightpx, ndpi
|
||||
|
@ -2640,10 +2927,22 @@ def convert(*images, **kwargs):
|
|||
iccp,
|
||||
)
|
||||
|
||||
if kwargs["outputstream"]:
|
||||
pdf.tostream(kwargs["outputstream"])
|
||||
return
|
||||
pdf.finalize()
|
||||
return pdf
|
||||
|
||||
|
||||
# given one or more input image, depending on outputstream, either return a
|
||||
# string containing the whole PDF if outputstream is None or write the PDF
|
||||
# data to the given file-like object and return None
|
||||
#
|
||||
# Input images can be given as file like objects (they must implement read()),
|
||||
# as a binary string representing the image content or as filenames to the
|
||||
# images.
|
||||
def convert(*images, outputstream=None, **kwargs):
|
||||
pdf = convert_to_docobject(*images, **kwargs)
|
||||
if outputstream:
|
||||
pdf.tostream(outputstream)
|
||||
return
|
||||
return pdf.tostring()
|
||||
|
||||
|
||||
|
@ -2955,7 +3254,7 @@ def valid_date(string):
|
|||
else:
|
||||
try:
|
||||
return parser.parse(string)
|
||||
except TypeError:
|
||||
except:
|
||||
pass
|
||||
# as a last resort, try the local date utility
|
||||
try:
|
||||
|
@ -2968,7 +3267,7 @@ def valid_date(string):
|
|||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
else:
|
||||
return datetime.utcfromtimestamp(int(utime))
|
||||
return datetime.fromtimestamp(int(utime))
|
||||
raise argparse.ArgumentTypeError("cannot parse date: %s" % string)
|
||||
|
||||
|
||||
|
@ -3670,7 +3969,35 @@ def gui():
|
|||
app.mainloop()
|
||||
|
||||
|
||||
def main(argv=sys.argv):
|
||||
def file_is_icc(fname):
|
||||
with open(fname, "rb") as f:
|
||||
data = f.read(40)
|
||||
if len(data) < 40:
|
||||
return False
|
||||
return data[36:] == b"acsp"
|
||||
|
||||
|
||||
def validate_icc(fname):
|
||||
if not file_is_icc(fname):
|
||||
raise argparse.ArgumentTypeError('"%s" is not an ICC profile' % fname)
|
||||
return fname
|
||||
|
||||
|
||||
def get_default_icc_profile():
|
||||
for profile in [
|
||||
"/usr/share/color/icc/sRGB.icc",
|
||||
"/usr/share/color/icc/OpenICC/sRGB.icc",
|
||||
"/usr/share/color/icc/colord/sRGB.icc",
|
||||
]:
|
||||
if not os.path.exists(profile):
|
||||
continue
|
||||
if not file_is_icc(profile):
|
||||
continue
|
||||
return profile
|
||||
return "/usr/share/color/icc/sRGB.icc"
|
||||
|
||||
|
||||
def get_main_parser():
|
||||
rendered_papersizes = ""
|
||||
for k, v in sorted(papersizes.items()):
|
||||
rendered_papersizes += " %-8s %s\n" % (papernames[k], v)
|
||||
|
@ -3711,7 +4038,9 @@ Paper sizes:
|
|||
the value in the second column has the same effect as giving the short hand
|
||||
in the first column. Appending ^T (a caret/circumflex followed by the letter
|
||||
T) turns the paper size from portrait into landscape. The postfix thus
|
||||
symbolizes the transpose. The values are case insensitive.
|
||||
symbolizes the transpose. Note that on Windows cmd.exe the caret symbol is
|
||||
the escape character, so you need to put quotes around the option value.
|
||||
The values are case insensitive.
|
||||
|
||||
%s
|
||||
|
||||
|
@ -3773,12 +4102,16 @@ Examples:
|
|||
|
||||
$ img2pdf --output out.pdf page1.jpg page2.jpg
|
||||
|
||||
Use a custom dpi value for the input images:
|
||||
|
||||
$ img2pdf --output out.pdf --imgsize 300dpi page1.jpg page2.jpg
|
||||
|
||||
Convert a directory of JPEG images into a PDF with printable A4 pages in
|
||||
landscape mode. On each page, the photo takes the maximum amount of space
|
||||
while preserving its aspect ratio and a print border of 2 cm on the top and
|
||||
bottom and 2.5 cm on the left and right hand side.
|
||||
|
||||
$ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg
|
||||
$ img2pdf --output out.pdf --pagesize "A4^T" --border 2cm:2.5cm *.jpg
|
||||
|
||||
On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
|
||||
original image size if the image is smaller than that.
|
||||
|
@ -3913,6 +4246,17 @@ RGB.""",
|
|||
"input image be converted into a page in the resulting PDF.",
|
||||
)
|
||||
|
||||
outargs.add_argument(
|
||||
"--include-thumbnails",
|
||||
action="store_true",
|
||||
help="Some multi-frame formats like MPO carry a main image and "
|
||||
"one or more scaled-down copies of the main image (thumbnails). "
|
||||
"In such a case, img2pdf will only include the main image and "
|
||||
"not create additional pages for each of the thumbnails. If this "
|
||||
"option is set, img2pdf will instead create one page per frame and "
|
||||
"thus store each thumbnail on its own page.",
|
||||
)
|
||||
|
||||
outargs.add_argument(
|
||||
"--pillow-limit-break",
|
||||
action="store_true",
|
||||
|
@ -3924,13 +4268,28 @@ RGB.""",
|
|||
% Image.MAX_IMAGE_PIXELS,
|
||||
)
|
||||
|
||||
if sys.platform == "win32":
|
||||
# on Windows, there are no default paths to search for an ICC profile
|
||||
# so make the argument required instead of optional
|
||||
outargs.add_argument(
|
||||
"--pdfa",
|
||||
type=validate_icc,
|
||||
help="Output a PDF/A-1b compliant document. The argument to this "
|
||||
"option is the path to the ICC profile that will be embedded into "
|
||||
"the resulting PDF.",
|
||||
)
|
||||
else:
|
||||
outargs.add_argument(
|
||||
"--pdfa",
|
||||
nargs="?",
|
||||
const="/usr/share/color/icc/sRGB.icc",
|
||||
const=get_default_icc_profile(),
|
||||
default=None,
|
||||
type=validate_icc,
|
||||
help="Output a PDF/A-1b compliant document. By default, this will "
|
||||
"embed /usr/share/color/icc/sRGB.icc as the color profile.",
|
||||
"embed either /usr/share/color/icc/sRGB.icc, "
|
||||
"/usr/share/color/icc/OpenICC/sRGB.icc or "
|
||||
"/usr/share/color/icc/colord/sRGB.icc as the color profile, whichever "
|
||||
"is found to exist first.",
|
||||
)
|
||||
|
||||
sizeargs = parser.add_argument_group(
|
||||
|
@ -4220,8 +4579,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
|
|||
action="store_true",
|
||||
help="Instruct the PDF viewer to open the PDF in fullscreen mode",
|
||||
)
|
||||
return parser
|
||||
|
||||
args = parser.parse_args(argv[1:])
|
||||
|
||||
def main(argv=sys.argv):
|
||||
args = get_main_parser().parse_args(argv[1:])
|
||||
|
||||
if args.verbose:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
@ -4248,7 +4610,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
|
|||
print(
|
||||
"Reading image from standard input...\n"
|
||||
"Re-run with -h or --help for usage information.",
|
||||
file=sys.stderr
|
||||
file=sys.stderr,
|
||||
)
|
||||
try:
|
||||
images = [sys.stdin.buffer.read()]
|
||||
|
@ -4310,6 +4672,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
|
|||
artborder=args.art_border,
|
||||
pdfa=args.pdfa,
|
||||
rotation=args.rotation,
|
||||
include_thumbnails=args.include_thumbnails,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("error: " + str(e))
|
||||
|
|
File diff suppressed because it is too large
Load diff
55
src/jp2.py
55
src/jp2.py
|
@ -37,9 +37,8 @@ def getBox(data, byteStart, noBytes):
|
|||
|
||||
|
||||
def parse_ihdr(data):
|
||||
height = struct.unpack(">I", data[0:4])[0]
|
||||
width = struct.unpack(">I", data[4:8])[0]
|
||||
return width, height
|
||||
height, width, channels, bpp = struct.unpack(">IIHB", data[:11])
|
||||
return width, height, channels, bpp + 1
|
||||
|
||||
|
||||
def parse_colr(data):
|
||||
|
@ -59,8 +58,8 @@ def parse_colr(data):
|
|||
|
||||
def parse_resc(data):
|
||||
hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data)
|
||||
hdpi = ((hnum / hden) * (10 ** hexp) * 100) / 2.54
|
||||
vdpi = ((vnum / vden) * (10 ** vexp) * 100) / 2.54
|
||||
hdpi = ((hnum / hden) * (10**hexp) * 100) / 2.54
|
||||
vdpi = ((vnum / vden) * (10**vexp) * 100) / 2.54
|
||||
return hdpi, vdpi
|
||||
|
||||
|
||||
|
@ -85,13 +84,13 @@ def parse_jp2h(data):
|
|||
while byteStart < noBytes and boxLengthValue != 0:
|
||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||
if boxType == b"ihdr":
|
||||
width, height = parse_ihdr(boxContents)
|
||||
width, height, channels, bpp = parse_ihdr(boxContents)
|
||||
elif boxType == b"colr":
|
||||
colorspace = parse_colr(boxContents)
|
||||
elif boxType == b"res ":
|
||||
hdpi, vdpi = parse_res(boxContents)
|
||||
byteStart = byteEnd
|
||||
return (width, height, colorspace, hdpi, vdpi)
|
||||
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||
|
||||
|
||||
def parsejp2(data):
|
||||
|
@ -102,7 +101,9 @@ def parsejp2(data):
|
|||
while byteStart < noBytes and boxLengthValue != 0:
|
||||
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
|
||||
if boxType == b"jp2h":
|
||||
width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents)
|
||||
width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h(
|
||||
boxContents
|
||||
)
|
||||
break
|
||||
byteStart = byteEnd
|
||||
if not width:
|
||||
|
@ -112,13 +113,41 @@ def parsejp2(data):
|
|||
if not colorspace:
|
||||
raise Exception("no colorspace in jp2 header")
|
||||
# retrieving the dpi is optional so we do not error out if not present
|
||||
return (width, height, colorspace, hdpi, vdpi)
|
||||
return (width, height, colorspace, hdpi, vdpi, channels, bpp)
|
||||
|
||||
|
||||
def parsej2k(data):
|
||||
lsiz, rsiz, xsiz, ysiz, xosiz, yosiz, _, _, _, _, csiz = struct.unpack(
|
||||
">HHIIIIIIIIH", data[4:42]
|
||||
)
|
||||
ssiz = [None] * csiz
|
||||
xrsiz = [None] * csiz
|
||||
yrsiz = [None] * csiz
|
||||
for i in range(csiz):
|
||||
ssiz[i], xrsiz[i], yrsiz[i] = struct.unpack(
|
||||
"BBB", data[42 + 3 * i : 42 + 3 * (i + 1)]
|
||||
)
|
||||
assert ssiz == [7, 7, 7]
|
||||
return xsiz - xosiz, ysiz - yosiz, None, None, None, csiz, 8
|
||||
|
||||
|
||||
def parse(data):
|
||||
if data[:4] == b"\xff\x4f\xff\x51":
|
||||
return parsej2k(data)
|
||||
else:
|
||||
return parsejp2(data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
width, height, colorspace = parsejp2(open(sys.argv[1]).read())
|
||||
sys.stdout.write("width = %d" % width)
|
||||
sys.stdout.write("height = %d" % height)
|
||||
sys.stdout.write("colorspace = %s" % colorspace)
|
||||
width, height, colorspace, hdpi, vdpi, channels, bpp = parse(
|
||||
open(sys.argv[1], "rb").read()
|
||||
)
|
||||
print("width = %d" % width)
|
||||
print("height = %d" % height)
|
||||
print("colorspace = %s" % colorspace)
|
||||
print("hdpi = %s" % hdpi)
|
||||
print("vdpi = %s" % vdpi)
|
||||
print("channels = %s" % channels)
|
||||
print("bpp = %s" % bpp)
|
||||
|
|
BIN
src/tests/input/mono.jb2
Normal file
BIN
src/tests/input/mono.jb2
Normal file
Binary file not shown.
BIN
src/tests/output/mono.jb2.pdf
Normal file
BIN
src/tests/output/mono.jb2.pdf
Normal file
Binary file not shown.
Loading…
Reference in a new issue