1
0
Fork 0

Address discrepancies between PDF and XMP timestamps

The PDF format and XMP metadata specs define different syntax for dates,
so account for these discrepancies by more carefully constructing the
final timestamps by post-processing strftime() output.
Patrick McCarty 12 months ago
parent 1dd05cc36b
commit 5a414ce4e4

@ -721,8 +721,16 @@ class pdfdoc(object):
if engine != Engine.pikepdf:
self.writer.docinfo = PdfDict(indirect=True)
timezone_regex = r'^([+-])([0-9]{2})([0-9]{2})([0-9.]+)?$'
def datetime_to_pdfdate(dt):
return dt.strftime("%Y%m%d%H%M%S%z")
time_no_tz = dt.strftime("%Y%m%d%H%M%S")
# Format for `%z` specifier is [+-]HHMM(SS(\.ffffff)?)?, but the
# PDF format only accepts the [+-]HHMM part, and it must be
# formatted as [+-]HH'MM'.
tz = dt.strftime("%z")
tz_pdf = re.sub(timezone_regex, r"\1\2'\3'", tz)
return time_no_tz + tz_pdf
for k in ["Title", "Author", "Creator", "Producer", "Subject"]:
v = locals()[k.lower()]
@ -752,7 +760,13 @@ class pdfdoc(object):
)
def datetime_to_xmpdate(dt):
return dt.strftime("%Y-%m-%dT%H:%M:%S%z")
time_no_tz = dt.strftime("%Y-%m-%dT%H:%M:%S")
# Format for `%z` specifier is [+-]HHMM(SS(\.ffffff)?)?, but the
# XMP metadata only accepts the [+-]HHMM part, and it must be
# formatted as [+-]HH:MM.
tz = dt.strftime("%z")
tz_xmp = re.sub(timezone_regex, r'\1\2:\3', tz)
return time_no_tz + tz_xmp
self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>

Loading…
Cancel
Save