diff --git a/src/img2pdf.py b/src/img2pdf.py index f89670b..5379089 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -1075,7 +1075,7 @@ class pdfdoc(object): self.tostream(stream) return stream.getvalue() - def tostream(self, outputstream): + def finalize(self): if self.engine == Engine.pikepdf: PdfArray = pikepdf.Array PdfDict = pikepdf.Dictionary @@ -1267,7 +1267,9 @@ class pdfdoc(object): self.writer.addobj(metadata) self.writer.addobj(iccstream) - # now write out the PDF + def tostream(self, outputstream): + # write out the PDF + # this assumes that finalize() has been invoked beforehand by the caller if self.engine == Engine.pikepdf: kwargs = {} if pikepdf.__version__ >= "6.2.0": @@ -1276,6 +1278,7 @@ class pdfdoc(object): outputstream, min_version=self.output_version, linearize=True, **kwargs ) elif self.engine == Engine.pdfrw: + from pdfrw import PdfName, PdfArray self.writer.trailer.Info = self.writer.docinfo # setting the version attribute of the pdfrw PdfWriter object will # influence the behaviour of the write() function @@ -2605,14 +2608,11 @@ def find_scale(pagewidth, pageheight): return 10 ** ceil(log10(oversized)) -# given one or more input image, depending on outputstream, either return a -# string containing the whole PDF if outputstream is None or write the PDF -# data to the given file-like object and return None -# -# Input images can be given as file like objects (they must implement read()), -# as a binary string representing the image content or as filenames to the -# images. -def convert(*images, **kwargs): +# Convert the image(s) to a `pdfdoc` object. +# The `.writer` attribute holds the underlying engine document handle, and +# `.output_version` the minimum version the caller should use when saving. +# The main convert() wraps this implementation function. +def convert_to_docobject(*images, **kwargs): _default_kwargs = dict( engine=None, title=None, @@ -2633,7 +2633,6 @@ def convert(*images, **kwargs): viewer_fit_window=False, viewer_center_window=False, viewer_fullscreen=False, - outputstream=None, first_frame_only=False, allow_oversized=True, cropborder=None, @@ -2796,10 +2795,22 @@ def convert(*images, **kwargs): iccp, ) - if kwargs["outputstream"]: - pdf.tostream(kwargs["outputstream"]) - return + pdf.finalize() + return pdf + +# given one or more input image, depending on outputstream, either return a +# string containing the whole PDF if outputstream is None or write the PDF +# data to the given file-like object and return None +# +# Input images can be given as file like objects (they must implement read()), +# as a binary string representing the image content or as filenames to the +# images. +def convert(*images, outputstream=None, **kwargs): + pdf = convert_to_docobject(*images, **kwargs) + if outputstream: + pdf.tostream(outputstream) + return return pdf.tostring() diff --git a/src/img2pdf_test.py b/src/img2pdf_test.py index 5d9ce85..493ff7b 100755 --- a/src/img2pdf_test.py +++ b/src/img2pdf_test.py @@ -7146,6 +7146,16 @@ def test_general(general_input, engine): pass +def test_return_engine_doc(tmp_path_factory): + inputf = os.path.join(os.path.dirname(__file__), "tests", "input", "normal.jpg") + outputf = tmp_path_factory.mktemp("return_engine_doc") / "normal.jpg.pdf" + pdf_wrapper = img2pdf.convert_to_docobject(inputf, engine=img2pdf.Engine.pikepdf) + pdf = pdf_wrapper.writer + assert isinstance(pdf, pikepdf.Pdf) + pdf.save(outputf, min_version=pdf_wrapper.output_version, linearize=True) + assert os.path.isfile(outputf) + + def main(): normal16 = alpha_value()[:, :, 0:3] pathlib.Path("test.icc").write_bytes(icc_profile())