Compare commits

...

2 commits

Author SHA1 Message Date
5100507403 Address discrepancies between PDF and XMP timestamps
The PDF format and XMP metadata specs define different syntax for dates,
so account for these discrepancies by more carefully constructing the
final timestamps by post-processing strftime() output.
2023-05-29 19:55:05 -07:00
1dd05cc36b Treat default creation/mod dates as UTC (fixes #155)
(Tested with Python 3.11.3 on Arch Linux.)

Without passing a tzinfo object to `datetime.now()`, a "naive" datetime
object is created, which is not timezone-aware. To fix the default
date/time detection for non-UTC local timezones, pass
`datetime.timezone.utc` to convert the value to UTC and make the
datetime object "aware".

Also, adjust the strftime() wrappers to use the UTC offsets instead of a
literal `Z`; using the literal `Z` at the end appears to be valid for
ISO 8601, but for some reason it does not successfully convert, whereas
the `%z` placeholder substitutes the UTC offset and successfully
converts.
2023-05-29 14:17:56 -07:00

View file

@ -37,6 +37,7 @@ if hasattr(GifImagePlugin, "LoadingStrategy"):
# TiffImagePlugin.DEBUG = True
from PIL.ExifTags import TAGS
from datetime import datetime
from datetime import timezone
from jp2 import parsejp2
from enum import Enum
from io import BytesIO
@ -721,7 +722,16 @@ class pdfdoc(object):
self.writer.docinfo = PdfDict(indirect=True)
def datetime_to_pdfdate(dt):
return dt.strftime("%Y%m%d%H%M%SZ")
time_no_tz = dt.strftime("%Y%m%d%H%M%S")
tz_pdf = ""
# Format for `%z` specifier is [+-]HHMM(SS(\.ffffff)?)?, but the
# PDF format only accepts the [+-]HHMM part, and it must be
# formatted as [+-]HH'MM'. Note that PDF 1.7 removed the need for
# the trailing apostrophe (after MM), but earlier specs require it.
tz = dt.strftime("%z")
if tz:
tz_pdf = "%s%s'%s'" % (tz[0], tz[1:3], tz[3:5])
return time_no_tz + tz_pdf
for k in ["Title", "Author", "Creator", "Producer", "Subject"]:
v = locals()[k.lower()]
@ -731,7 +741,7 @@ class pdfdoc(object):
v = PdfString.encode(v)
self.writer.docinfo[getattr(PdfName, k)] = v
now = datetime.now()
now = datetime.now(tz=timezone.utc)
for k in ["CreationDate", "ModDate"]:
v = locals()[k.lower()]
if v is None and nodate:
@ -751,7 +761,15 @@ class pdfdoc(object):
)
def datetime_to_xmpdate(dt):
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
time_no_tz = dt.strftime("%Y-%m-%dT%H:%M:%S")
tz_xmp = ""
# Format for `%z` specifier is [+-]HHMM(SS(\.ffffff)?)?, but the
# XMP metadata only accepts the [+-]HHMM part, and it must be
# formatted as [+-]HH:MM.
tz = dt.strftime("%z")
if tz:
tz_xmp = "%s%s:%s" % (tz[0], tz[1:3], tz[3:5])
return time_no_tz + tz_xmp
self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>