Compare commits

..

3 commits

Author SHA1 Message Date
512478ac4e Fix format of PDF and XMP timezone offsets
The way to specify timezone offsets differs between the PDF and XMP
specs, so hardcode the UTC offset instead of using `%z`. This also
avoids the possibility of embedding the more granular timezones
supported by the datetime module but that are unsupported by either PDF
or XMP formats.
2023-06-10 17:36:47 -07:00
9c3753ed2b Convert manual creation/mod times to UTC as well
In line with the previous commit for consistency, convert the date
strings from --creationdate and --moddate to UTC as well if a timezone
is specified.
2023-06-10 17:26:13 -07:00
1dd05cc36b Treat default creation/mod dates as UTC (fixes #155)
(Tested with Python 3.11.3 on Arch Linux.)

Without passing a tzinfo object to `datetime.now()`, a "naive" datetime
object is created, which is not timezone-aware. To fix the default
date/time detection for non-UTC local timezones, pass
`datetime.timezone.utc` to convert the value to UTC and make the
datetime object "aware".

Also, adjust the strftime() wrappers to use the UTC offsets instead of a
literal `Z`; using the literal `Z` at the end appears to be valid for
ISO 8601, but for some reason it does not successfully convert, whereas
the `%z` placeholder substitutes the UTC offset and successfully
converts.
2023-05-29 14:17:56 -07:00
2 changed files with 22 additions and 43 deletions

View file

@ -22,7 +22,7 @@ import sys
import os
import zlib
import argparse
from PIL import Image, TiffImagePlugin, GifImagePlugin, ImageCms
from PIL import Image, TiffImagePlugin, GifImagePlugin
if hasattr(GifImagePlugin, "LoadingStrategy"):
# Pillow 9.0.0 started emitting all frames but the first as RGB instead of
@ -36,7 +36,8 @@ if hasattr(GifImagePlugin, "LoadingStrategy"):
# TiffImagePlugin.DEBUG = True
from PIL.ExifTags import TAGS
from datetime import datetime, timezone
from datetime import datetime
from datetime import timezone
from jp2 import parsejp2
from enum import Enum
from io import BytesIO
@ -46,7 +47,6 @@ import platform
import hashlib
from itertools import chain
import re
import io
logger = logging.getLogger(__name__)
@ -722,7 +722,8 @@ class pdfdoc(object):
self.writer.docinfo = PdfDict(indirect=True)
def datetime_to_pdfdate(dt):
return dt.astimezone(tz=timezone.utc).strftime("%Y%m%d%H%M%SZ")
dt_utc = dt.astimezone(tz=timezone.utc)
return dt_utc.strftime("%Y%m%d%H%M%S+00'00'")
for k in ["Title", "Author", "Creator", "Producer", "Subject"]:
v = locals()[k.lower()]
@ -752,7 +753,8 @@ class pdfdoc(object):
)
def datetime_to_xmpdate(dt):
return dt.astimezone(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
dt_utc = dt.astimezone(tz=timezone.utc)
return dt_utc.strftime("%Y-%m-%dT%H:%M:%S+00:00")
self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>
@ -1432,21 +1434,6 @@ def get_imgmetadata(
iccp = None
if "icc_profile" in imgdata.info:
iccp = imgdata.info.get("icc_profile")
# GIMP saves bilevel tiff images with an RGB ICC profile which is useless
# and produces an error in Adobe Acrobat, so we ignore it with a warning.
# imagemagick also used to (wrongly) include an RGB ICC profile for bilevel
# images: https://github.com/ImageMagick/ImageMagick/issues/2070
if iccp is not None and color == Colorspace["1"] and imgformat == ImageFormat.TIFF:
with io.BytesIO(iccp) as f:
prf = ImageCms.ImageCmsProfile(f)
if (
prf.profile.model == "sRGB"
and prf.profile.manufacturer == "GIMP"
and prf.profile.profile_description == "GIMP built-in sRGB"
):
logger.warning("Ignoring RGB ICC profile in bilevel TIFF produced by GIMP.")
logger.warning("https://gitlab.gnome.org/GNOME/gimp/-/issues/3438")
iccp = None
logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
@ -2101,16 +2088,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
)
)
else:
if color in [Colorspace.P, Colorspace.PA] and iccp is not None:
# PDF does not support palette images with icc profile
if color == Colorspace.P:
newcolor = Colorspace.RGB
newimg = newimg.convert(mode="RGB")
elif color == Colorspace.PA:
newcolor = Colorspace.RGBA
newimg = newimg.convert(mode="RGBA")
smaskidat = None
elif (
if (
color == Colorspace.RGBA
or color == Colorspace.LA
or color == Colorspace.PA
@ -2124,11 +2102,6 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
newcolor = color
l, a = newimg.split()
newimg = l
elif color == Colorspace.PA or (
color == Colorspace.P and "transparency" in newimg.info
):
newcolor = color
a = newimg.convert(mode="RGBA").split()[-1]
else:
newcolor = Colorspace.RGBA
r, g, b, a = newimg.convert(mode="RGBA").split()
@ -2139,6 +2112,15 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
"Image contains an alpha channel. Computing a separate "
"soft mask (/SMask) image to store transparency in PDF."
)
elif color in [Colorspace.P, Colorspace.PA] and iccp is not None:
# PDF does not support palette images with icc profile
if color == Colorspace.P:
newcolor = Colorspace.RGB
newimg = newimg.convert(mode="RGB")
elif color == Colorspace.PA:
newcolor = Colorspace.RGBA
newimg = newimg.convert(mode="RGBA")
smaskidat = None
else:
newcolor = color
smaskidat = None
@ -3732,9 +3714,7 @@ Paper sizes:
the value in the second column has the same effect as giving the short hand
in the first column. Appending ^T (a caret/circumflex followed by the letter
T) turns the paper size from portrait into landscape. The postfix thus
symbolizes the transpose. Note that on Windows cmd.exe the caret symbol is
the escape character, so you need to put quotes around the option value.
The values are case insensitive.
symbolizes the transpose. The values are case insensitive.
%s
@ -3801,7 +3781,7 @@ Examples:
while preserving its aspect ratio and a print border of 2 cm on the top and
bottom and 2.5 cm on the left and right hand side.
$ img2pdf --output out.pdf --pagesize "A4^T" --border 2cm:2.5cm *.jpg
$ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg
On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
original image size if the image is smaller than that.
@ -4271,7 +4251,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
print(
"Reading image from standard input...\n"
"Re-run with -h or --help for usage information.",
file=sys.stderr,
file=sys.stderr
)
try:
images = [sys.stdin.buffer.read()]

View file

@ -4276,10 +4276,9 @@ def gif_transparent_pdf(tmp_path_factory, gif_transparent_img, request):
== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
)
assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8
assert p.pages[0].Resources.XObject.Im0.ColorSpace[0] == "/Indexed"
assert p.pages[0].Resources.XObject.Im0.ColorSpace[1] == "/DeviceRGB"
assert p.pages[0].Resources.XObject.Im0.ColorSpace == "/DeviceRGB"
assert p.pages[0].Resources.XObject.Im0.DecodeParms.BitsPerComponent == 8
assert p.pages[0].Resources.XObject.Im0.DecodeParms.Colors == 1
assert p.pages[0].Resources.XObject.Im0.DecodeParms.Colors == 3
assert p.pages[0].Resources.XObject.Im0.DecodeParms.Predictor == 15
assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode"
assert p.pages[0].Resources.XObject.Im0.Height == 60