break out convert_to_docobject() from convert() which returns a document handle

This commit is contained in:
mara004 2024-08-26 13:26:55 +02:00 committed by Johannes Schauer Marin Rodrigues
parent 5e515abb6f
commit 2d5e4e3cb7
Signed by: josch
GPG key ID: F2CBA5C78FBD83E1
2 changed files with 35 additions and 14 deletions

View file

@ -1080,7 +1080,7 @@ class pdfdoc(object):
self.tostream(stream)
return stream.getvalue()
def tostream(self, outputstream):
def finalize(self):
if self.engine == Engine.pikepdf:
PdfArray = pikepdf.Array
PdfDict = pikepdf.Dictionary
@ -1272,7 +1272,9 @@ class pdfdoc(object):
self.writer.addobj(metadata)
self.writer.addobj(iccstream)
# now write out the PDF
def tostream(self, outputstream):
# write out the PDF
# this assumes that finalize() has been invoked beforehand by the caller
if self.engine == Engine.pikepdf:
kwargs = {}
if pikepdf.__version__ >= "6.2.0":
@ -1281,6 +1283,7 @@ class pdfdoc(object):
outputstream, min_version=self.output_version, linearize=True, **kwargs
)
elif self.engine == Engine.pdfrw:
from pdfrw import PdfName, PdfArray
self.writer.trailer.Info = self.writer.docinfo
# setting the version attribute of the pdfrw PdfWriter object will
# influence the behaviour of the write() function
@ -2687,14 +2690,11 @@ def find_scale(pagewidth, pageheight):
return 10 ** ceil(log10(oversized))
# given one or more input image, depending on outputstream, either return a
# string containing the whole PDF if outputstream is None or write the PDF
# data to the given file-like object and return None
#
# Input images can be given as file like objects (they must implement read()),
# as a binary string representing the image content or as filenames to the
# images.
def convert(*images, **kwargs):
# Convert the image(s) to a `pdfdoc` object.
# The `.writer` attribute holds the underlying engine document handle, and
# `.output_version` the minimum version the caller should use when saving.
# The main convert() wraps this implementation function.
def convert_to_docobject(*images, **kwargs):
_default_kwargs = dict(
engine=None,
title=None,
@ -2715,7 +2715,6 @@ def convert(*images, **kwargs):
viewer_fit_window=False,
viewer_center_window=False,
viewer_fullscreen=False,
outputstream=None,
first_frame_only=False,
allow_oversized=True,
cropborder=None,
@ -2878,10 +2877,22 @@ def convert(*images, **kwargs):
iccp,
)
if kwargs["outputstream"]:
pdf.tostream(kwargs["outputstream"])
return
pdf.finalize()
return pdf
# given one or more input image, depending on outputstream, either return a
# string containing the whole PDF if outputstream is None or write the PDF
# data to the given file-like object and return None
#
# Input images can be given as file like objects (they must implement read()),
# as a binary string representing the image content or as filenames to the
# images.
def convert(*images, outputstream=None, **kwargs):
pdf = convert_to_docobject(*images, **kwargs)
if outputstream:
pdf.tostream(outputstream)
return
return pdf.tostring()

View file

@ -7178,6 +7178,16 @@ def test_general(general_input, engine):
pass
def test_return_engine_doc(tmp_path_factory):
inputf = os.path.join(os.path.dirname(__file__), "tests", "input", "normal.jpg")
outputf = tmp_path_factory.mktemp("return_engine_doc") / "normal.jpg.pdf"
pdf_wrapper = img2pdf.convert_to_docobject(inputf, engine=img2pdf.Engine.pikepdf)
pdf = pdf_wrapper.writer
assert isinstance(pdf, pikepdf.Pdf)
pdf.save(outputf, min_version=pdf_wrapper.output_version, linearize=True)
assert os.path.isfile(outputf)
def main():
normal16 = alpha_value()[:, :, 0:3]
pathlib.Path("test.icc").write_bytes(icc_profile())