From be313aa7939ddd62116ce5fb9a673965ecd8102f Mon Sep 17 00:00:00 2001 From: mara004 Date: Mon, 26 Aug 2024 13:26:55 +0200 Subject: [PATCH 1/7] convert(): add option to return the engine document handle --- src/img2pdf.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index f89670b..6fc044d 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -1075,7 +1075,7 @@ class pdfdoc(object): self.tostream(stream) return stream.getvalue() - def tostream(self, outputstream): + def tostream(self, outputstream, return_engine_doc=False): if self.engine == Engine.pikepdf: PdfArray = pikepdf.Array PdfDict = pikepdf.Dictionary @@ -1267,6 +1267,9 @@ class pdfdoc(object): self.writer.addobj(metadata) self.writer.addobj(iccstream) + if return_engine_doc: + return self.writer, self.output_version + # now write out the PDF if self.engine == Engine.pikepdf: kwargs = {} @@ -2634,6 +2637,7 @@ def convert(*images, **kwargs): viewer_center_window=False, viewer_fullscreen=False, outputstream=None, + return_engine_doc=False, first_frame_only=False, allow_oversized=True, cropborder=None, @@ -2800,6 +2804,9 @@ def convert(*images, **kwargs): pdf.tostream(kwargs["outputstream"]) return + if kwargs["return_engine_doc"]: + return pdf.tostream(None, return_engine_doc=True) + return pdf.tostring() -- 2.39.5 From 8d931f9570220b87db53ca3ccdf13bac181e53f7 Mon Sep 17 00:00:00 2001 From: mara004 Date: Mon, 26 Aug 2024 14:40:29 +0200 Subject: [PATCH 2/7] Add simple test --- src/img2pdf_test.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/img2pdf_test.py b/src/img2pdf_test.py index 5d9ce85..3cfdebf 100755 --- a/src/img2pdf_test.py +++ b/src/img2pdf_test.py @@ -7146,6 +7146,17 @@ def test_general(general_input, engine): pass +def test_return_engine_doc(): + inputf = os.path.join(os.path.dirname(__file__), "tests", "input", "normal.jpg") + outputf = os.path.join( + os.path.dirname(__file__), "tests", "output", "engine_normal.jpg.pdf" + ) + pdf, min_version = img2pdf.convert(inputf, return_engine_doc=True, engine=img2pdf.Engine.pikepdf) + assert isinstance(pdf, pikepdf.Pdf) + pdf.save(outputf, min_version=min_version, linearize=True) + assert os.path.isfile(outputf) + + def main(): normal16 = alpha_value()[:, :, 0:3] pathlib.Path("test.icc").write_bytes(icc_profile()) -- 2.39.5 From d4b49c510afbb8a9938ebb9ab977d280a204c814 Mon Sep 17 00:00:00 2001 From: mara004 Date: Mon, 26 Aug 2024 14:44:34 +0200 Subject: [PATCH 3/7] Minor cleanup --- src/img2pdf.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index 6fc044d..5e5a640 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -1075,7 +1075,7 @@ class pdfdoc(object): self.tostream(stream) return stream.getvalue() - def tostream(self, outputstream, return_engine_doc=False): + def tostream(self, outputstream=None, return_engine_doc=False): if self.engine == Engine.pikepdf: PdfArray = pikepdf.Array PdfDict = pikepdf.Dictionary @@ -1271,6 +1271,8 @@ class pdfdoc(object): return self.writer, self.output_version # now write out the PDF + if outputstream is None: + raise TypeError("pdfdoc.tostream() requires outputstream unless return_engine_doc is True.") if self.engine == Engine.pikepdf: kwargs = {} if pikepdf.__version__ >= "6.2.0": @@ -2801,11 +2803,11 @@ def convert(*images, **kwargs): ) if kwargs["outputstream"]: - pdf.tostream(kwargs["outputstream"]) + pdf.tostream(outputstream=kwargs["outputstream"]) return if kwargs["return_engine_doc"]: - return pdf.tostream(None, return_engine_doc=True) + return pdf.tostream(return_engine_doc=True) return pdf.tostring() -- 2.39.5 From 316629fa40d0741f1d619b5efff3770a851322d5 Mon Sep 17 00:00:00 2001 From: mara004 Date: Mon, 26 Aug 2024 15:04:52 +0200 Subject: [PATCH 4/7] Write to tmp path rather than output/ output seems to be part of the git repository and used for comparisons --- src/img2pdf_test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/img2pdf_test.py b/src/img2pdf_test.py index 3cfdebf..a44cfe7 100755 --- a/src/img2pdf_test.py +++ b/src/img2pdf_test.py @@ -7146,11 +7146,9 @@ def test_general(general_input, engine): pass -def test_return_engine_doc(): +def test_return_engine_doc(tmp_path_factory): inputf = os.path.join(os.path.dirname(__file__), "tests", "input", "normal.jpg") - outputf = os.path.join( - os.path.dirname(__file__), "tests", "output", "engine_normal.jpg.pdf" - ) + outputf = tmp_path_factory.mktemp("return_engine_doc") / "normal.jpg.pdf" pdf, min_version = img2pdf.convert(inputf, return_engine_doc=True, engine=img2pdf.Engine.pikepdf) assert isinstance(pdf, pikepdf.Pdf) pdf.save(outputf, min_version=min_version, linearize=True) -- 2.39.5 From c1c77a7453dd790d91b72db178793755b64c31d4 Mon Sep 17 00:00:00 2001 From: mara004 Date: Tue, 27 Aug 2024 22:42:39 +0200 Subject: [PATCH 5/7] Extract a separate function from `tostream()` --- src/img2pdf.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index 5e5a640..f27e0f5 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -1075,7 +1075,7 @@ class pdfdoc(object): self.tostream(stream) return stream.getvalue() - def tostream(self, outputstream=None, return_engine_doc=False): + def todoc(self): if self.engine == Engine.pikepdf: PdfArray = pikepdf.Array PdfDict = pikepdf.Dictionary @@ -1267,12 +1267,11 @@ class pdfdoc(object): self.writer.addobj(metadata) self.writer.addobj(iccstream) - if return_engine_doc: - return self.writer, self.output_version + return self.writer, self.output_version - # now write out the PDF - if outputstream is None: - raise TypeError("pdfdoc.tostream() requires outputstream unless return_engine_doc is True.") + def tostream(self, outputstream): + # write out the PDF + self.todoc() # finalize self.writer if self.engine == Engine.pikepdf: kwargs = {} if pikepdf.__version__ >= "6.2.0": @@ -1281,6 +1280,7 @@ class pdfdoc(object): outputstream, min_version=self.output_version, linearize=True, **kwargs ) elif self.engine == Engine.pdfrw: + from pdfrw import PdfName, PdfArray self.writer.trailer.Info = self.writer.docinfo # setting the version attribute of the pdfrw PdfWriter object will # influence the behaviour of the write() function @@ -2803,11 +2803,11 @@ def convert(*images, **kwargs): ) if kwargs["outputstream"]: - pdf.tostream(outputstream=kwargs["outputstream"]) + pdf.tostream(kwargs["outputstream"]) return if kwargs["return_engine_doc"]: - return pdf.tostream(return_engine_doc=True) + return pdf.todoc() return pdf.tostring() -- 2.39.5 From 7afbdd892524f68c48863b7cc222f447dd60aa59 Mon Sep 17 00:00:00 2001 From: mara004 Date: Wed, 28 Aug 2024 00:32:10 +0200 Subject: [PATCH 6/7] Factor out from convert() too, adapt pdfdoc --- src/img2pdf.py | 42 ++++++++++++++++++++++-------------------- src/img2pdf_test.py | 5 +++-- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index f27e0f5..5379089 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -1075,7 +1075,7 @@ class pdfdoc(object): self.tostream(stream) return stream.getvalue() - def todoc(self): + def finalize(self): if self.engine == Engine.pikepdf: PdfArray = pikepdf.Array PdfDict = pikepdf.Dictionary @@ -1267,11 +1267,9 @@ class pdfdoc(object): self.writer.addobj(metadata) self.writer.addobj(iccstream) - return self.writer, self.output_version - def tostream(self, outputstream): # write out the PDF - self.todoc() # finalize self.writer + # this assumes that finalize() has been invoked beforehand by the caller if self.engine == Engine.pikepdf: kwargs = {} if pikepdf.__version__ >= "6.2.0": @@ -2610,14 +2608,11 @@ def find_scale(pagewidth, pageheight): return 10 ** ceil(log10(oversized)) -# given one or more input image, depending on outputstream, either return a -# string containing the whole PDF if outputstream is None or write the PDF -# data to the given file-like object and return None -# -# Input images can be given as file like objects (they must implement read()), -# as a binary string representing the image content or as filenames to the -# images. -def convert(*images, **kwargs): +# Convert the image(s) to a `pdfdoc` object. +# The `.writer` attribute holds the underlying engine document handle, and +# `.output_version` the minimum version the caller should use when saving. +# The main convert() wraps this implementation function. +def convert_to_docobject(*images, **kwargs): _default_kwargs = dict( engine=None, title=None, @@ -2638,8 +2633,6 @@ def convert(*images, **kwargs): viewer_fit_window=False, viewer_center_window=False, viewer_fullscreen=False, - outputstream=None, - return_engine_doc=False, first_frame_only=False, allow_oversized=True, cropborder=None, @@ -2802,13 +2795,22 @@ def convert(*images, **kwargs): iccp, ) - if kwargs["outputstream"]: - pdf.tostream(kwargs["outputstream"]) + pdf.finalize() + return pdf + + +# given one or more input image, depending on outputstream, either return a +# string containing the whole PDF if outputstream is None or write the PDF +# data to the given file-like object and return None +# +# Input images can be given as file like objects (they must implement read()), +# as a binary string representing the image content or as filenames to the +# images. +def convert(*images, outputstream=None, **kwargs): + pdf = convert_to_docobject(*images, **kwargs) + if outputstream: + pdf.tostream(outputstream) return - - if kwargs["return_engine_doc"]: - return pdf.todoc() - return pdf.tostring() diff --git a/src/img2pdf_test.py b/src/img2pdf_test.py index a44cfe7..f7e80f9 100755 --- a/src/img2pdf_test.py +++ b/src/img2pdf_test.py @@ -7149,9 +7149,10 @@ def test_general(general_input, engine): def test_return_engine_doc(tmp_path_factory): inputf = os.path.join(os.path.dirname(__file__), "tests", "input", "normal.jpg") outputf = tmp_path_factory.mktemp("return_engine_doc") / "normal.jpg.pdf" - pdf, min_version = img2pdf.convert(inputf, return_engine_doc=True, engine=img2pdf.Engine.pikepdf) + pdf_wrapper = img2pdf.convert_to_docobject(inputf, engine=img2pdf.Engine.pikepdf) + pdf = pdf.writer assert isinstance(pdf, pikepdf.Pdf) - pdf.save(outputf, min_version=min_version, linearize=True) + pdf.save(outputf, min_version=pdf_wrapper.output_version, linearize=True) assert os.path.isfile(outputf) -- 2.39.5 From fd27129b102a640c2c0a0be5b33ace56e8f03b16 Mon Sep 17 00:00:00 2001 From: mara004 Date: Wed, 28 Aug 2024 00:38:50 +0200 Subject: [PATCH 7/7] Fix typo in test --- src/img2pdf_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/img2pdf_test.py b/src/img2pdf_test.py index f7e80f9..493ff7b 100755 --- a/src/img2pdf_test.py +++ b/src/img2pdf_test.py @@ -7150,7 +7150,7 @@ def test_return_engine_doc(tmp_path_factory): inputf = os.path.join(os.path.dirname(__file__), "tests", "input", "normal.jpg") outputf = tmp_path_factory.mktemp("return_engine_doc") / "normal.jpg.pdf" pdf_wrapper = img2pdf.convert_to_docobject(inputf, engine=img2pdf.Engine.pikepdf) - pdf = pdf.writer + pdf = pdf_wrapper.writer assert isinstance(pdf, pikepdf.Pdf) pdf.save(outputf, min_version=pdf_wrapper.output_version, linearize=True) assert os.path.isfile(outputf) -- 2.39.5