From b25429a4c11bd62171066e8b9caf1b80c610ee58 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Marin Rodrigues Date: Sun, 11 Jun 2023 07:48:50 +0200 Subject: [PATCH] src/img2pdf_test.py: add tests for timestamps --- src/img2pdf_test.py | 119 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/src/img2pdf_test.py b/src/img2pdf_test.py index 55c3b05..f89fa74 100755 --- a/src/img2pdf_test.py +++ b/src/img2pdf_test.py @@ -19,6 +19,8 @@ from packaging.version import parse as parse_version import warnings import json import pathlib +import itertools +import xml.etree.ElementTree as ET img2pdfprog = os.getenv("img2pdfprog", default="src/img2pdf.py") @@ -37,6 +39,14 @@ for glob in ICC_PROFILE_PATHS: ICC_PROFILE = path break +HAVE_FAKETIME = True +try: + ver = subprocess.check_output(["faketime", "--version"]) + if b"faketime: Version " not in ver: + HAVE_FAKETIME = False +except FileNotFoundError: + HAVE_FAKETIME = False + HAVE_MUTOOL = True try: ver = subprocess.check_output(["mutool", "-v"], stderr=subprocess.STDOUT) @@ -130,6 +140,25 @@ psnr_re = re.compile(rb"((?:inf|(?:0|[1-9][0-9]*)(?:\.[0-9]+)?))(?: \([0-9.]+\)) ############################################################################### +# Interpret a datetime string in a given timezone and format it according to a +# given format string in in UTC. +# We avoid using the Python datetime module for this job because doing so would +# just replicate the code we want to test for correctness. +def tz2utcstrftime(string, fmt, timezone): + return ( + subprocess.check_output( + [ + "date", + "--utc", + f'--date=TZ="{timezone}" {string}', + f"+{fmt}", + ] + ) + .decode("utf8") + .removesuffix("\n") + ) + + def find_closest_palette_color(color, palette): if color.ndim == 0: idx = (numpy.abs(palette - color)).argmin() @@ -6913,6 +6942,96 @@ def general_input(request): return request.param +@pytest.mark.skipif(not HAVE_FAKETIME, reason="requires faketime") +@pytest.mark.parametrize( + "engine,testdata,timezone,pdfa", + itertools.product( + ["internal", "pikepdf"], + ["2021-02-05 17:49:00"], + ["Europe/Berlin", "GMT+12"], + [True, False], + ), +) +def test_faketime(tmp_path_factory, jpg_img, engine, testdata, timezone, pdfa): + expected = tz2utcstrftime(testdata, "D:%Y%m%d%H%M%SZ", timezone) + out_pdf = tmp_path_factory.mktemp("faketime") / "out.pdf" + subprocess.check_call( + ["env", f"TZ={timezone}", "faketime", "-f", testdata, img2pdfprog] + + (["--pdfa"] if pdfa else []) + + [ + "--producer=", + "--engine=" + engine, + "--output=" + str(out_pdf), + str(jpg_img), + ] + ) + with pikepdf.open(str(out_pdf)) as p: + assert p.docinfo.CreationDate == expected + assert p.docinfo.ModDate == expected + if pdfa: + assert p.Root.Metadata.Subtype == "/XML" + assert p.Root.Metadata.Type == "/Metadata" + expected = tz2utcstrftime(testdata, "%Y-%m-%dT%H:%M:%SZ", timezone) + root = ET.fromstring(p.Root.Metadata.read_bytes()) + for k in ["ModifyDate", "CreateDate"]: + assert ( + root.find( + f".//xmp:{k}", {"xmp": "http://ns.adobe.com/xap/1.0/"} + ).text + == expected + ) + out_pdf.unlink() + + +@pytest.mark.parametrize( + "engine,testdata,timezone,pdfa", + itertools.product( + ["internal", "pikepdf"], + [ + "2021-02-05 17:49:00", + "2021-02-05T17:49:00", + "Fri, 05 Feb 2021 17:49:00 +0100", + "last year 12:00", + ], + ["Europe/Berlin", "GMT+12"], + [True, False], + ), +) +def test_date(tmp_path_factory, jpg_img, engine, testdata, timezone, pdfa): + # we use the date utility to convert the timestamp from the local + # timezone into UTC with the format used by PDF + expected = tz2utcstrftime(testdata, "D:%Y%m%d%H%M%SZ", timezone) + out_pdf = tmp_path_factory.mktemp("faketime") / "out.pdf" + subprocess.check_call( + ["env", f"TZ={timezone}", img2pdfprog] + + (["--pdfa"] if pdfa else []) + + [ + f"--moddate={testdata}", + f"--creationdate={testdata}", + "--producer=", + "--engine=" + engine, + "--output=" + str(out_pdf), + str(jpg_img), + ] + ) + with pikepdf.open(str(out_pdf)) as p: + assert p.docinfo.CreationDate == expected + assert p.docinfo.ModDate == expected + if pdfa: + assert p.Root.Metadata.Subtype == "/XML" + assert p.Root.Metadata.Type == "/Metadata" + expected = tz2utcstrftime(testdata, "%Y-%m-%dT%H:%M:%SZ", timezone) + root = ET.fromstring(p.Root.Metadata.read_bytes()) + for k in ["ModifyDate", "CreateDate"]: + assert ( + root.find( + f".//xmp:{k}", {"xmp": "http://ns.adobe.com/xap/1.0/"} + ).text + == expected + ) + out_pdf.unlink() + + @pytest.mark.parametrize("engine", ["internal", "pikepdf"]) def test_general(general_input, engine): inputf = os.path.join(os.path.dirname(__file__), "tests", "input", general_input)