From 7afbdd892524f68c48863b7cc222f447dd60aa59 Mon Sep 17 00:00:00 2001
From: mara004 <geisserml@gmail.com>
Date: Wed, 28 Aug 2024 00:32:10 +0200
Subject: [PATCH] Factor out from convert() too, adapt pdfdoc

---
 src/img2pdf.py      | 42 ++++++++++++++++++++++--------------------
 src/img2pdf_test.py |  5 +++--
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/img2pdf.py b/src/img2pdf.py
index f27e0f5..5379089 100755
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@@ -1075,7 +1075,7 @@ class pdfdoc(object):
         self.tostream(stream)
         return stream.getvalue()
 
-    def todoc(self):
+    def finalize(self):
         if self.engine == Engine.pikepdf:
             PdfArray = pikepdf.Array
             PdfDict = pikepdf.Dictionary
@@ -1267,11 +1267,9 @@ class pdfdoc(object):
                 self.writer.addobj(metadata)
                 self.writer.addobj(iccstream)
 
-        return self.writer, self.output_version
-
     def tostream(self, outputstream):
         # write out the PDF
-        self.todoc()  # finalize self.writer
+        # this assumes that finalize() has been invoked beforehand by the caller
         if self.engine == Engine.pikepdf:
             kwargs = {}
             if pikepdf.__version__ >= "6.2.0":
@@ -2610,14 +2608,11 @@ def find_scale(pagewidth, pageheight):
     return 10 ** ceil(log10(oversized))
 
 
-# given one or more input image, depending on outputstream, either return a
-# string containing the whole PDF if outputstream is None or write the PDF
-# data to the given file-like object and return None
-#
-# Input images can be given as file like objects (they must implement read()),
-# as a binary string representing the image content or as filenames to the
-# images.
-def convert(*images, **kwargs):
+# Convert the image(s) to a `pdfdoc` object.
+# The `.writer` attribute holds the underlying engine document handle, and
+# `.output_version` the minimum version the caller should use when saving.
+# The main convert() wraps this implementation function.
+def convert_to_docobject(*images, **kwargs):
     _default_kwargs = dict(
         engine=None,
         title=None,
@@ -2638,8 +2633,6 @@ def convert(*images, **kwargs):
         viewer_fit_window=False,
         viewer_center_window=False,
         viewer_fullscreen=False,
-        outputstream=None,
-        return_engine_doc=False,
         first_frame_only=False,
         allow_oversized=True,
         cropborder=None,
@@ -2802,13 +2795,22 @@ def convert(*images, **kwargs):
                 iccp,
             )
 
-    if kwargs["outputstream"]:
-        pdf.tostream(kwargs["outputstream"])
+    pdf.finalize()
+    return pdf
+
+
+# given one or more input image, depending on outputstream, either return a
+# string containing the whole PDF if outputstream is None or write the PDF
+# data to the given file-like object and return None
+#
+# Input images can be given as file like objects (they must implement read()),
+# as a binary string representing the image content or as filenames to the
+# images.
+def convert(*images, outputstream=None, **kwargs):
+    pdf = convert_to_docobject(*images, **kwargs)
+    if outputstream:
+        pdf.tostream(outputstream)
         return
-
-    if kwargs["return_engine_doc"]:
-        return pdf.todoc()
-
     return pdf.tostring()
 
 
diff --git a/src/img2pdf_test.py b/src/img2pdf_test.py
index a44cfe7..f7e80f9 100755
--- a/src/img2pdf_test.py
+++ b/src/img2pdf_test.py
@@ -7149,9 +7149,10 @@ def test_general(general_input, engine):
 def test_return_engine_doc(tmp_path_factory):
     inputf = os.path.join(os.path.dirname(__file__), "tests", "input", "normal.jpg")
     outputf = tmp_path_factory.mktemp("return_engine_doc") / "normal.jpg.pdf"
-    pdf, min_version = img2pdf.convert(inputf, return_engine_doc=True, engine=img2pdf.Engine.pikepdf)
+    pdf_wrapper = img2pdf.convert_to_docobject(inputf, engine=img2pdf.Engine.pikepdf)
+    pdf = pdf.writer
     assert isinstance(pdf, pikepdf.Pdf)
-    pdf.save(outputf, min_version=min_version, linearize=True)
+    pdf.save(outputf, min_version=pdf_wrapper.output_version, linearize=True)
     assert os.path.isfile(outputf)