Compare commits

..

7 commits

Author SHA1 Message Date
81502f21af Convert creation/modification dates to UTC (fixes #155)
Ensure that timezones are correctly interpreted in the input by calling
`.astimezone()` as appropriate on datetime objects, and store the
resulting date fields as UTC.

One could argue that datetimes in the local timezone be stored in the
PDF, but then the date string handling becomes more complicated; the PDF
and XMP date specs both use the `Z` suffix to indicate UTC time, but
other +/- offsets require different syntax between the two specs.
2023-06-10 17:53:03 -07:00
0cbcb8fa12
avoid converting palette PNG with alpha to RGB (closes: #158) 2023-06-08 08:54:37 +02:00
e9e04b6dd9
extend comments around dropping ICC profile stored by GIMP for bilevel input 2023-06-08 08:53:22 +02:00
fc059ee471
use quotes around caret in examples for windows users
Closes: #167
2023-06-08 07:14:17 +02:00
25466113e9
another small fixup for the last commit 2023-05-30 08:06:36 +02:00
7405635b72
only check whether icc profile can be dropped if there is any 2023-05-30 07:10:32 +02:00
aea472101b
strip off RGB color profile from bilevel TIFF images produced by gimp (closes: #164) 2023-05-30 06:25:26 +02:00
2 changed files with 43 additions and 22 deletions

View file

@ -22,7 +22,7 @@ import sys
import os import os
import zlib import zlib
import argparse import argparse
from PIL import Image, TiffImagePlugin, GifImagePlugin from PIL import Image, TiffImagePlugin, GifImagePlugin, ImageCms
if hasattr(GifImagePlugin, "LoadingStrategy"): if hasattr(GifImagePlugin, "LoadingStrategy"):
# Pillow 9.0.0 started emitting all frames but the first as RGB instead of # Pillow 9.0.0 started emitting all frames but the first as RGB instead of
@ -36,8 +36,7 @@ if hasattr(GifImagePlugin, "LoadingStrategy"):
# TiffImagePlugin.DEBUG = True # TiffImagePlugin.DEBUG = True
from PIL.ExifTags import TAGS from PIL.ExifTags import TAGS
from datetime import datetime from datetime import datetime, timezone
from datetime import timezone
from jp2 import parsejp2 from jp2 import parsejp2
from enum import Enum from enum import Enum
from io import BytesIO from io import BytesIO
@ -47,6 +46,7 @@ import platform
import hashlib import hashlib
from itertools import chain from itertools import chain
import re import re
import io
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -722,8 +722,7 @@ class pdfdoc(object):
self.writer.docinfo = PdfDict(indirect=True) self.writer.docinfo = PdfDict(indirect=True)
def datetime_to_pdfdate(dt): def datetime_to_pdfdate(dt):
dt_utc = dt.astimezone(tz=timezone.utc) return dt.astimezone(tz=timezone.utc).strftime("%Y%m%d%H%M%SZ")
return dt_utc.strftime("%Y%m%d%H%M%S+00'00'")
for k in ["Title", "Author", "Creator", "Producer", "Subject"]: for k in ["Title", "Author", "Creator", "Producer", "Subject"]:
v = locals()[k.lower()] v = locals()[k.lower()]
@ -753,8 +752,7 @@ class pdfdoc(object):
) )
def datetime_to_xmpdate(dt): def datetime_to_xmpdate(dt):
dt_utc = dt.astimezone(tz=timezone.utc) return dt.astimezone(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
return dt_utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?> self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'> <x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>
@ -1434,6 +1432,21 @@ def get_imgmetadata(
iccp = None iccp = None
if "icc_profile" in imgdata.info: if "icc_profile" in imgdata.info:
iccp = imgdata.info.get("icc_profile") iccp = imgdata.info.get("icc_profile")
# GIMP saves bilevel tiff images with an RGB ICC profile which is useless
# and produces an error in Adobe Acrobat, so we ignore it with a warning.
# imagemagick also used to (wrongly) include an RGB ICC profile for bilevel
# images: https://github.com/ImageMagick/ImageMagick/issues/2070
if iccp is not None and color == Colorspace["1"] and imgformat == ImageFormat.TIFF:
with io.BytesIO(iccp) as f:
prf = ImageCms.ImageCmsProfile(f)
if (
prf.profile.model == "sRGB"
and prf.profile.manufacturer == "GIMP"
and prf.profile.profile_description == "GIMP built-in sRGB"
):
logger.warning("Ignoring RGB ICC profile in bilevel TIFF produced by GIMP.")
logger.warning("https://gitlab.gnome.org/GNOME/gimp/-/issues/3438")
iccp = None
logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx) logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
@ -2088,7 +2101,16 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
) )
) )
else: else:
if ( if color in [Colorspace.P, Colorspace.PA] and iccp is not None:
# PDF does not support palette images with icc profile
if color == Colorspace.P:
newcolor = Colorspace.RGB
newimg = newimg.convert(mode="RGB")
elif color == Colorspace.PA:
newcolor = Colorspace.RGBA
newimg = newimg.convert(mode="RGBA")
smaskidat = None
elif (
color == Colorspace.RGBA color == Colorspace.RGBA
or color == Colorspace.LA or color == Colorspace.LA
or color == Colorspace.PA or color == Colorspace.PA
@ -2102,6 +2124,11 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
newcolor = color newcolor = color
l, a = newimg.split() l, a = newimg.split()
newimg = l newimg = l
elif color == Colorspace.PA or (
color == Colorspace.P and "transparency" in newimg.info
):
newcolor = color
a = newimg.convert(mode="RGBA").split()[-1]
else: else:
newcolor = Colorspace.RGBA newcolor = Colorspace.RGBA
r, g, b, a = newimg.convert(mode="RGBA").split() r, g, b, a = newimg.convert(mode="RGBA").split()
@ -2112,15 +2139,6 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
"Image contains an alpha channel. Computing a separate " "Image contains an alpha channel. Computing a separate "
"soft mask (/SMask) image to store transparency in PDF." "soft mask (/SMask) image to store transparency in PDF."
) )
elif color in [Colorspace.P, Colorspace.PA] and iccp is not None:
# PDF does not support palette images with icc profile
if color == Colorspace.P:
newcolor = Colorspace.RGB
newimg = newimg.convert(mode="RGB")
elif color == Colorspace.PA:
newcolor = Colorspace.RGBA
newimg = newimg.convert(mode="RGBA")
smaskidat = None
else: else:
newcolor = color newcolor = color
smaskidat = None smaskidat = None
@ -3714,7 +3732,9 @@ Paper sizes:
the value in the second column has the same effect as giving the short hand the value in the second column has the same effect as giving the short hand
in the first column. Appending ^T (a caret/circumflex followed by the letter in the first column. Appending ^T (a caret/circumflex followed by the letter
T) turns the paper size from portrait into landscape. The postfix thus T) turns the paper size from portrait into landscape. The postfix thus
symbolizes the transpose. The values are case insensitive. symbolizes the transpose. Note that on Windows cmd.exe the caret symbol is
the escape character, so you need to put quotes around the option value.
The values are case insensitive.
%s %s
@ -3781,7 +3801,7 @@ Examples:
while preserving its aspect ratio and a print border of 2 cm on the top and while preserving its aspect ratio and a print border of 2 cm on the top and
bottom and 2.5 cm on the left and right hand side. bottom and 2.5 cm on the left and right hand side.
$ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg $ img2pdf --output out.pdf --pagesize "A4^T" --border 2cm:2.5cm *.jpg
On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
original image size if the image is smaller than that. original image size if the image is smaller than that.
@ -4251,7 +4271,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
print( print(
"Reading image from standard input...\n" "Reading image from standard input...\n"
"Re-run with -h or --help for usage information.", "Re-run with -h or --help for usage information.",
file=sys.stderr file=sys.stderr,
) )
try: try:
images = [sys.stdin.buffer.read()] images = [sys.stdin.buffer.read()]

View file

@ -4276,9 +4276,10 @@ def gif_transparent_pdf(tmp_path_factory, gif_transparent_img, request):
== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ" == b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
) )
assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8 assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8
assert p.pages[0].Resources.XObject.Im0.ColorSpace == "/DeviceRGB" assert p.pages[0].Resources.XObject.Im0.ColorSpace[0] == "/Indexed"
assert p.pages[0].Resources.XObject.Im0.ColorSpace[1] == "/DeviceRGB"
assert p.pages[0].Resources.XObject.Im0.DecodeParms.BitsPerComponent == 8 assert p.pages[0].Resources.XObject.Im0.DecodeParms.BitsPerComponent == 8
assert p.pages[0].Resources.XObject.Im0.DecodeParms.Colors == 3 assert p.pages[0].Resources.XObject.Im0.DecodeParms.Colors == 1
assert p.pages[0].Resources.XObject.Im0.DecodeParms.Predictor == 15 assert p.pages[0].Resources.XObject.Im0.DecodeParms.Predictor == 15
assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode" assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode"
assert p.pages[0].Resources.XObject.Im0.Height == 60 assert p.pages[0].Resources.XObject.Im0.Height == 60