From 7afbdd892524f68c48863b7cc222f447dd60aa59 Mon Sep 17 00:00:00 2001 From: mara004 Date: Wed, 28 Aug 2024 00:32:10 +0200 Subject: [PATCH] Factor out from convert() too, adapt pdfdoc --- src/img2pdf.py | 42 ++++++++++++++++++++++-------------------- src/img2pdf_test.py | 5 +++-- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index f27e0f5..5379089 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -1075,7 +1075,7 @@ class pdfdoc(object): self.tostream(stream) return stream.getvalue() - def todoc(self): + def finalize(self): if self.engine == Engine.pikepdf: PdfArray = pikepdf.Array PdfDict = pikepdf.Dictionary @@ -1267,11 +1267,9 @@ class pdfdoc(object): self.writer.addobj(metadata) self.writer.addobj(iccstream) - return self.writer, self.output_version - def tostream(self, outputstream): # write out the PDF - self.todoc() # finalize self.writer + # this assumes that finalize() has been invoked beforehand by the caller if self.engine == Engine.pikepdf: kwargs = {} if pikepdf.__version__ >= "6.2.0": @@ -2610,14 +2608,11 @@ def find_scale(pagewidth, pageheight): return 10 ** ceil(log10(oversized)) -# given one or more input image, depending on outputstream, either return a -# string containing the whole PDF if outputstream is None or write the PDF -# data to the given file-like object and return None -# -# Input images can be given as file like objects (they must implement read()), -# as a binary string representing the image content or as filenames to the -# images. -def convert(*images, **kwargs): +# Convert the image(s) to a `pdfdoc` object. +# The `.writer` attribute holds the underlying engine document handle, and +# `.output_version` the minimum version the caller should use when saving. +# The main convert() wraps this implementation function. +def convert_to_docobject(*images, **kwargs): _default_kwargs = dict( engine=None, title=None, @@ -2638,8 +2633,6 @@ def convert(*images, **kwargs): viewer_fit_window=False, viewer_center_window=False, viewer_fullscreen=False, - outputstream=None, - return_engine_doc=False, first_frame_only=False, allow_oversized=True, cropborder=None, @@ -2802,13 +2795,22 @@ def convert(*images, **kwargs): iccp, ) - if kwargs["outputstream"]: - pdf.tostream(kwargs["outputstream"]) + pdf.finalize() + return pdf + + +# given one or more input image, depending on outputstream, either return a +# string containing the whole PDF if outputstream is None or write the PDF +# data to the given file-like object and return None +# +# Input images can be given as file like objects (they must implement read()), +# as a binary string representing the image content or as filenames to the +# images. +def convert(*images, outputstream=None, **kwargs): + pdf = convert_to_docobject(*images, **kwargs) + if outputstream: + pdf.tostream(outputstream) return - - if kwargs["return_engine_doc"]: - return pdf.todoc() - return pdf.tostring() diff --git a/src/img2pdf_test.py b/src/img2pdf_test.py index a44cfe7..f7e80f9 100755 --- a/src/img2pdf_test.py +++ b/src/img2pdf_test.py @@ -7149,9 +7149,10 @@ def test_general(general_input, engine): def test_return_engine_doc(tmp_path_factory): inputf = os.path.join(os.path.dirname(__file__), "tests", "input", "normal.jpg") outputf = tmp_path_factory.mktemp("return_engine_doc") / "normal.jpg.pdf" - pdf, min_version = img2pdf.convert(inputf, return_engine_doc=True, engine=img2pdf.Engine.pikepdf) + pdf_wrapper = img2pdf.convert_to_docobject(inputf, engine=img2pdf.Engine.pikepdf) + pdf = pdf.writer assert isinstance(pdf, pikepdf.Pdf) - pdf.save(outputf, min_version=min_version, linearize=True) + pdf.save(outputf, min_version=pdf_wrapper.output_version, linearize=True) assert os.path.isfile(outputf)