Compare commits

...

7 commits

4 changed files with 132 additions and 23 deletions

View file

@ -2,6 +2,22 @@
CHANGES CHANGES
======= =======
0.5.1 (2023-11-26)
------------------
- no default ICC profile location for PDF/A-1b on Windows
- workaround for PNG input without dpi units but non-square dpi aspect ratio
0.5.0 (2023-10-28)
------------------
- support MIFF for 16 bit CMYK input
- accept pathlib.Path objects as input
- don't store RGB ICC profiles from bilevel or grayscale TIFF, PNG and JPEG
- thumbnails are no longer included by default and --include-thumbnails has to
be used if you want them
- support for pikepdf (>= 6.2.0)
0.4.4 (2022-04-07) 0.4.4 (2022-04-07)
------------------ ------------------

39
HACKING
View file

@ -27,6 +27,41 @@ Making a new release
- Build and upload to pypi: - Build and upload to pypi:
$ rm dist/* $ rm -rf dist/*
$ python3 setup.py sdist $ python3 setup.py sdist
$ twine upload --sign dist/* $ twine upload dist/*
Using debbisect to find regressions
-----------------------------------
$ debbisect --cache=./cache --depends="git,ca-certificates,python3,
ghostscript,imagemagick,mupdf-tools,poppler-utils,python3-pil,
python3-pytest,python3-numpy,python3-scipy,python3-pikepdf" \
--verbose 2023-09-16 2023-10-24 \
'chroot "$1" sh -c "
git clone https://gitlab.mister-muffin.de/josch/img2pdf.git
&& cd img2pdf
&& pytest 'src/img2pdf_test.py::test_jpg_2000_rgba8[internal]"'
Using debbisect cache
---------------------
$ mmdebstrap --variant=apt --aptopt='Acquire::Check-Valid-Until "false"' \
--include=git,ca-certificates,python3,ghostscript,imagemagick \
--include=mupdf-tools,poppler-utils,python3-pil,python3-pytest \
--include=python3-numpy,python3-scipy,python3-pikepdf \
--hook-dir=/usr/share/mmdebstrap/hooks/file-mirror-automount \
--setup-hook='mkdir -p "$1/home/josch/git/devscripts/cache/pool/"' \
--setup-hook='mount -o ro,bind /home/josch/git/devscripts/cache/pool/ "$1/home/josch/git/devscripts/cache/pool/"' \
--chrooted-customize-hook=bash
unstable /dev/null
file:///home/josch/git/devscripts/cache/archive/debian/20231022T090139Z/
Bisecting imagemagick
---------------------
$ git clean -fdx && git reset --hard
$ ./configure --prefix=$(pwd)/prefix
$ make -j$(nproc)
$ make install
$ LD_LIBRARY_PATH=$(pwd)/prefix/lib prefix/bin/compare ...

View file

@ -1,7 +1,7 @@
import sys import sys
from setuptools import setup from setuptools import setup
VERSION = "0.4.4" VERSION = "0.5.1"
INSTALL_REQUIRES = ( INSTALL_REQUIRES = (
"Pillow", "Pillow",

View file

@ -22,7 +22,7 @@ import sys
import os import os
import zlib import zlib
import argparse import argparse
from PIL import Image, TiffImagePlugin, GifImagePlugin, ImageCms from PIL import Image, TiffImagePlugin, GifImagePlugin, ImageCms, ExifTags
if hasattr(GifImagePlugin, "LoadingStrategy"): if hasattr(GifImagePlugin, "LoadingStrategy"):
# Pillow 9.0.0 started emitting all frames but the first as RGB instead of # Pillow 9.0.0 started emitting all frames but the first as RGB instead of
@ -62,7 +62,7 @@ try:
except ImportError: except ImportError:
have_pikepdf = False have_pikepdf = False
__version__ = "0.4.4" __version__ = "0.5.1"
default_dpi = 96.0 default_dpi = 96.0
papersizes = { papersizes = {
"letter": "8.5inx11in", "letter": "8.5inx11in",
@ -1298,6 +1298,7 @@ class pdfdoc(object):
def get_imgmetadata( def get_imgmetadata(
imgdata, imgformat, default_dpi, colorspace, rawdata=None, rotreq=None imgdata, imgformat, default_dpi, colorspace, rawdata=None, rotreq=None
): ):
if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None: if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
# this codepath gets called if the PIL installation is not able to # this codepath gets called if the PIL installation is not able to
# handle JPEG2000 files # handle JPEG2000 files
@ -1311,7 +1312,37 @@ def get_imgmetadata(
else: else:
imgwidthpx, imgheightpx = imgdata.size imgwidthpx, imgheightpx = imgdata.size
ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi)) ndpi = None
# For JPEG images with both EXIF tags and JFIF tags, Pillow seems reading image resolution from JFIF.
# However, "Preview" on Mac and "Photos" on Windows read the resolution from EXIF.
# We try to read the value from EXIF first
exif = imgdata.getexif()
if exif:
exif_res_unit = exif.get(ExifTags.Base.ResolutionUnit)
exif_x_res = exif.get(ExifTags.Base.XResolution)
exif_y_res = exif.get(ExifTags.Base.YResolution)
if exif_x_res and exif_y_res:
if (exif_res_unit == 3): # cm
ndpi = (exif_x_res * 2.54, exif_y_res * 2.54)
else:
ndpi = (exif_x_res, exif_y_res)
# if no DPI from EXIF, get it from `info`
if ndpi is None:
ndpi = imgdata.info.get("dpi")
if ndpi is None:
# the PNG plugin of PIL adds the undocumented "aspect" field instead of
# the "dpi" field if the PNG pHYs chunk unit is not set to meters
if imgformat == ImageFormat.PNG and imgdata.info.get("aspect") is not None:
aspect = imgdata.info["aspect"]
# make sure not to go below the default dpi
if aspect[0] > aspect[1]:
ndpi = (default_dpi * aspect[0] / aspect[1], default_dpi)
else:
ndpi = (default_dpi, default_dpi * aspect[1] / aspect[0])
else:
ndpi = (default_dpi, default_dpi)
# In python3, the returned dpi value for some tiff images will # In python3, the returned dpi value for some tiff images will
# not be an integer but a float. To make the behaviour of # not be an integer but a float. To make the behaviour of
# img2pdf the same between python2 and python3, we convert that # img2pdf the same between python2 and python3, we convert that
@ -1489,10 +1520,8 @@ def get_imgmetadata(
with io.BytesIO(iccp) as f: with io.BytesIO(iccp) as f:
prf = ImageCms.ImageCmsProfile(f) prf = ImageCms.ImageCmsProfile(f)
if prf.profile.xcolor_space not in ('GRAY'): if prf.profile.xcolor_space not in ("GRAY"):
logger.warning( logger.warning("Ignoring non-GRAY ICC profile in Grayscale JPG")
"Ignoring non-GRAY ICC profile in Grayscale JPG"
)
iccp = None iccp = None
logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx) logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
@ -3816,13 +3845,30 @@ def gui():
app.mainloop() app.mainloop()
def file_is_icc(fname):
with open(fname, "rb") as f:
data = f.read(40)
if len(data) < 40:
return False
return data[36:] == b"acsp"
def validate_icc(fname):
if not file_is_icc(fname):
raise argparse.ArgumentTypeError('"%s" is not an ICC profile' % fname)
return fname
def get_default_icc_profile(): def get_default_icc_profile():
for profile in [ for profile in [
"/usr/share/color/icc/sRGB.icc", "/usr/share/color/icc/sRGB.icc",
"/usr/share/color/icc/OpenICC/sRGB.icc", "/usr/share/color/icc/OpenICC/sRGB.icc",
"/usr/share/color/icc/colord/sRGB.icc", "/usr/share/color/icc/colord/sRGB.icc",
]: ]:
if os.path.exists(profile): if not os.path.exists(profile):
continue
if not file_is_icc(profile):
continue
return profile return profile
return "/usr/share/color/icc/sRGB.icc" return "/usr/share/color/icc/sRGB.icc"
@ -4094,11 +4140,23 @@ RGB.""",
% Image.MAX_IMAGE_PIXELS, % Image.MAX_IMAGE_PIXELS,
) )
if sys.platform == "win32":
# on Windows, there are no default paths to search for an ICC profile
# so make the argument required instead of optional
outargs.add_argument(
"--pdfa",
type=validate_icc,
help="Output a PDF/A-1b compliant document. The argument to this "
"option is the path to the ICC profile that will be embedded into "
"the resulting PDF.",
)
else:
outargs.add_argument( outargs.add_argument(
"--pdfa", "--pdfa",
nargs="?", nargs="?",
const=get_default_icc_profile(), const=get_default_icc_profile(),
default=None, default=None,
type=validate_icc,
help="Output a PDF/A-1b compliant document. By default, this will " help="Output a PDF/A-1b compliant document. By default, this will "
"embed either /usr/share/color/icc/sRGB.icc, " "embed either /usr/share/color/icc/sRGB.icc, "
"/usr/share/color/icc/OpenICC/sRGB.icc or " "/usr/share/color/icc/OpenICC/sRGB.icc or "