From 51005074038a705211c211f1f2a5bb5ec17371dd Mon Sep 17 00:00:00 2001 From: Patrick McCarty Date: Mon, 29 May 2023 17:23:24 -0700 Subject: [PATCH] Address discrepancies between PDF and XMP timestamps The PDF format and XMP metadata specs define different syntax for dates, so account for these discrepancies by more carefully constructing the final timestamps by post-processing strftime() output. --- src/img2pdf.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index 4a1cd37..b901af5 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -722,7 +722,16 @@ class pdfdoc(object): self.writer.docinfo = PdfDict(indirect=True) def datetime_to_pdfdate(dt): - return dt.strftime("%Y%m%d%H%M%S%z") + time_no_tz = dt.strftime("%Y%m%d%H%M%S") + tz_pdf = "" + # Format for `%z` specifier is [+-]HHMM(SS(\.ffffff)?)?, but the + # PDF format only accepts the [+-]HHMM part, and it must be + # formatted as [+-]HH'MM'. Note that PDF 1.7 removed the need for + # the trailing apostrophe (after MM), but earlier specs require it. + tz = dt.strftime("%z") + if tz: + tz_pdf = "%s%s'%s'" % (tz[0], tz[1:3], tz[3:5]) + return time_no_tz + tz_pdf for k in ["Title", "Author", "Creator", "Producer", "Subject"]: v = locals()[k.lower()] @@ -752,7 +761,15 @@ class pdfdoc(object): ) def datetime_to_xmpdate(dt): - return dt.strftime("%Y-%m-%dT%H:%M:%S%z") + time_no_tz = dt.strftime("%Y-%m-%dT%H:%M:%S") + tz_xmp = "" + # Format for `%z` specifier is [+-]HHMM(SS(\.ffffff)?)?, but the + # XMP metadata only accepts the [+-]HHMM part, and it must be + # formatted as [+-]HH:MM. + tz = dt.strftime("%z") + if tz: + tz_xmp = "%s%s:%s" % (tz[0], tz[1:3], tz[3:5]) + return time_no_tz + tz_xmp self.xmp = b"""