From b25429a4c11bd62171066e8b9caf1b80c610ee58 Mon Sep 17 00:00:00 2001
From: Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
Date: Sun, 11 Jun 2023 07:48:50 +0200
Subject: [PATCH] src/img2pdf_test.py: add tests for timestamps

---
 src/img2pdf_test.py | 119 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/src/img2pdf_test.py b/src/img2pdf_test.py
index 55c3b05..f89fa74 100755
--- a/src/img2pdf_test.py
+++ b/src/img2pdf_test.py
@@ -19,6 +19,8 @@ from packaging.version import parse as parse_version
 import warnings
 import json
 import pathlib
+import itertools
+import xml.etree.ElementTree as ET
 
 img2pdfprog = os.getenv("img2pdfprog", default="src/img2pdf.py")
 
@@ -37,6 +39,14 @@ for glob in ICC_PROFILE_PATHS:
             ICC_PROFILE = path
             break
 
+HAVE_FAKETIME = True
+try:
+    ver = subprocess.check_output(["faketime", "--version"])
+    if b"faketime: Version " not in ver:
+        HAVE_FAKETIME = False
+except FileNotFoundError:
+    HAVE_FAKETIME = False
+
 HAVE_MUTOOL = True
 try:
     ver = subprocess.check_output(["mutool", "-v"], stderr=subprocess.STDOUT)
@@ -130,6 +140,25 @@ psnr_re = re.compile(rb"((?:inf|(?:0|[1-9][0-9]*)(?:\.[0-9]+)?))(?: \([0-9.]+\))
 ###############################################################################
 
 
+# Interpret a datetime string in a given timezone and format it according to a
+# given format string in in UTC.
+# We avoid using the Python datetime module for this job because doing so would
+# just replicate the code we want to test for correctness.
+def tz2utcstrftime(string, fmt, timezone):
+    return (
+        subprocess.check_output(
+            [
+                "date",
+                "--utc",
+                f'--date=TZ="{timezone}" {string}',
+                f"+{fmt}",
+            ]
+        )
+        .decode("utf8")
+        .removesuffix("\n")
+    )
+
+
 def find_closest_palette_color(color, palette):
     if color.ndim == 0:
         idx = (numpy.abs(palette - color)).argmin()
@@ -6913,6 +6942,96 @@ def general_input(request):
     return request.param
 
 
+@pytest.mark.skipif(not HAVE_FAKETIME, reason="requires faketime")
+@pytest.mark.parametrize(
+    "engine,testdata,timezone,pdfa",
+    itertools.product(
+        ["internal", "pikepdf"],
+        ["2021-02-05 17:49:00"],
+        ["Europe/Berlin", "GMT+12"],
+        [True, False],
+    ),
+)
+def test_faketime(tmp_path_factory, jpg_img, engine, testdata, timezone, pdfa):
+    expected = tz2utcstrftime(testdata, "D:%Y%m%d%H%M%SZ", timezone)
+    out_pdf = tmp_path_factory.mktemp("faketime") / "out.pdf"
+    subprocess.check_call(
+        ["env", f"TZ={timezone}", "faketime", "-f", testdata, img2pdfprog]
+        + (["--pdfa"] if pdfa else [])
+        + [
+            "--producer=",
+            "--engine=" + engine,
+            "--output=" + str(out_pdf),
+            str(jpg_img),
+        ]
+    )
+    with pikepdf.open(str(out_pdf)) as p:
+        assert p.docinfo.CreationDate == expected
+        assert p.docinfo.ModDate == expected
+        if pdfa:
+            assert p.Root.Metadata.Subtype == "/XML"
+            assert p.Root.Metadata.Type == "/Metadata"
+            expected = tz2utcstrftime(testdata, "%Y-%m-%dT%H:%M:%SZ", timezone)
+            root = ET.fromstring(p.Root.Metadata.read_bytes())
+            for k in ["ModifyDate", "CreateDate"]:
+                assert (
+                    root.find(
+                        f".//xmp:{k}", {"xmp": "http://ns.adobe.com/xap/1.0/"}
+                    ).text
+                    == expected
+                )
+    out_pdf.unlink()
+
+
+@pytest.mark.parametrize(
+    "engine,testdata,timezone,pdfa",
+    itertools.product(
+        ["internal", "pikepdf"],
+        [
+            "2021-02-05 17:49:00",
+            "2021-02-05T17:49:00",
+            "Fri, 05 Feb 2021 17:49:00 +0100",
+            "last year 12:00",
+        ],
+        ["Europe/Berlin", "GMT+12"],
+        [True, False],
+    ),
+)
+def test_date(tmp_path_factory, jpg_img, engine, testdata, timezone, pdfa):
+    # we use the date utility to convert the timestamp from the local
+    # timezone into UTC with the format used by PDF
+    expected = tz2utcstrftime(testdata, "D:%Y%m%d%H%M%SZ", timezone)
+    out_pdf = tmp_path_factory.mktemp("faketime") / "out.pdf"
+    subprocess.check_call(
+        ["env", f"TZ={timezone}", img2pdfprog]
+        + (["--pdfa"] if pdfa else [])
+        + [
+            f"--moddate={testdata}",
+            f"--creationdate={testdata}",
+            "--producer=",
+            "--engine=" + engine,
+            "--output=" + str(out_pdf),
+            str(jpg_img),
+        ]
+    )
+    with pikepdf.open(str(out_pdf)) as p:
+        assert p.docinfo.CreationDate == expected
+        assert p.docinfo.ModDate == expected
+        if pdfa:
+            assert p.Root.Metadata.Subtype == "/XML"
+            assert p.Root.Metadata.Type == "/Metadata"
+            expected = tz2utcstrftime(testdata, "%Y-%m-%dT%H:%M:%SZ", timezone)
+            root = ET.fromstring(p.Root.Metadata.read_bytes())
+            for k in ["ModifyDate", "CreateDate"]:
+                assert (
+                    root.find(
+                        f".//xmp:{k}", {"xmp": "http://ns.adobe.com/xap/1.0/"}
+                    ).text
+                    == expected
+                )
+    out_pdf.unlink()
+
+
 @pytest.mark.parametrize("engine", ["internal", "pikepdf"])
 def test_general(general_input, engine):
     inputf = os.path.join(os.path.dirname(__file__), "tests", "input", general_input)