convert()
: add option to get engine document (e.g. pikepdf.Pdf)
#203
|
@ -1075,7 +1075,7 @@ class pdfdoc(object):
|
||||||
self.tostream(stream)
|
self.tostream(stream)
|
||||||
return stream.getvalue()
|
return stream.getvalue()
|
||||||
|
|
||||||
def tostream(self, outputstream):
|
def finalize(self):
|
||||||
|
|||||||
if self.engine == Engine.pikepdf:
|
if self.engine == Engine.pikepdf:
|
||||||
PdfArray = pikepdf.Array
|
PdfArray = pikepdf.Array
|
||||||
PdfDict = pikepdf.Dictionary
|
PdfDict = pikepdf.Dictionary
|
||||||
|
@ -1267,7 +1267,9 @@ class pdfdoc(object):
|
||||||
self.writer.addobj(metadata)
|
self.writer.addobj(metadata)
|
||||||
self.writer.addobj(iccstream)
|
self.writer.addobj(iccstream)
|
||||||
|
|
||||||
# now write out the PDF
|
def tostream(self, outputstream):
|
||||||
|
# write out the PDF
|
||||||
|
# this assumes that finalize() has been invoked beforehand by the caller
|
||||||
if self.engine == Engine.pikepdf:
|
if self.engine == Engine.pikepdf:
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
if pikepdf.__version__ >= "6.2.0":
|
if pikepdf.__version__ >= "6.2.0":
|
||||||
|
@ -1276,6 +1278,7 @@ class pdfdoc(object):
|
||||||
outputstream, min_version=self.output_version, linearize=True, **kwargs
|
outputstream, min_version=self.output_version, linearize=True, **kwargs
|
||||||
)
|
)
|
||||||
elif self.engine == Engine.pdfrw:
|
elif self.engine == Engine.pdfrw:
|
||||||
|
from pdfrw import PdfName, PdfArray
|
||||||
self.writer.trailer.Info = self.writer.docinfo
|
self.writer.trailer.Info = self.writer.docinfo
|
||||||
# setting the version attribute of the pdfrw PdfWriter object will
|
# setting the version attribute of the pdfrw PdfWriter object will
|
||||||
# influence the behaviour of the write() function
|
# influence the behaviour of the write() function
|
||||||
|
@ -2605,14 +2608,11 @@ def find_scale(pagewidth, pageheight):
|
||||||
return 10 ** ceil(log10(oversized))
|
return 10 ** ceil(log10(oversized))
|
||||||
|
|
||||||
|
|
||||||
# given one or more input image, depending on outputstream, either return a
|
# Convert the image(s) to a `pdfdoc` object.
|
||||||
# string containing the whole PDF if outputstream is None or write the PDF
|
# The `.writer` attribute holds the underlying engine document handle, and
|
||||||
# data to the given file-like object and return None
|
# `.output_version` the minimum version the caller should use when saving.
|
||||||
#
|
# The main convert() wraps this implementation function.
|
||||||
# Input images can be given as file like objects (they must implement read()),
|
def convert_to_docobject(*images, **kwargs):
|
||||||
# as a binary string representing the image content or as filenames to the
|
|
||||||
# images.
|
|
||||||
def convert(*images, **kwargs):
|
|
||||||
_default_kwargs = dict(
|
_default_kwargs = dict(
|
||||||
engine=None,
|
engine=None,
|
||||||
title=None,
|
title=None,
|
||||||
|
@ -2633,7 +2633,6 @@ def convert(*images, **kwargs):
|
||||||
viewer_fit_window=False,
|
viewer_fit_window=False,
|
||||||
viewer_center_window=False,
|
viewer_center_window=False,
|
||||||
viewer_fullscreen=False,
|
viewer_fullscreen=False,
|
||||||
outputstream=None,
|
|
||||||
first_frame_only=False,
|
first_frame_only=False,
|
||||||
allow_oversized=True,
|
allow_oversized=True,
|
||||||
cropborder=None,
|
cropborder=None,
|
||||||
|
@ -2796,10 +2795,22 @@ def convert(*images, **kwargs):
|
||||||
iccp,
|
iccp,
|
||||||
)
|
)
|
||||||
|
|
||||||
if kwargs["outputstream"]:
|
pdf.finalize()
|
||||||
pdf.tostream(kwargs["outputstream"])
|
return pdf
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
|
# given one or more input image, depending on outputstream, either return a
|
||||||
|
# string containing the whole PDF if outputstream is None or write the PDF
|
||||||
|
# data to the given file-like object and return None
|
||||||
|
#
|
||||||
|
# Input images can be given as file like objects (they must implement read()),
|
||||||
|
# as a binary string representing the image content or as filenames to the
|
||||||
|
# images.
|
||||||
|
def convert(*images, outputstream=None, **kwargs):
|
||||||
josch
commented
Please do not change the signature of the convert() function. This is necessary to preserve API stability. You have to extract "outputstream" from kwargs. Please do not change the signature of the convert() function. This is necessary to preserve API stability. You have to extract "outputstream" from kwargs.
mara0004
commented
It's not clear to me why this is supposed to break the API -- can you explain? It's not clear to me why this is supposed to break the API -- can you explain?
AFAICS, `kwargs` is internal, and no callee expects `kwargs["outputstream"]`, right?
mara0004
commented
Relatedly, it looks like the Relatedly, it looks like the `_default_kwargs` strategy silently ignores nonexistent parameters, which is problematic. (And of course it breaks IDE completion.)
~~I see you're using kwargs to unify access of `{crop,bleed,trim,art}border`, which is fine, but why can't any others params be in the signature directly?~~
mara0004
commented
Okay, self-answering the question: a SO search yielded that specifying optional params after a The issue with invalid kwargs assumably being ignored without error still stands, though. > It's not clear to me why this is supposed to break the API -- can you explain?
Okay, self-answering the question: a SO search yielded that specifying optional params after a `*capture` raises a `SyntaxError` on Python 2. Never having written code for Python 2, I did not know that. I'll change to extract from kwargs as you said, then.
The issue with invalid kwargs assumably being ignored without error still stands, though.
|
|||||||
|
pdf = convert_to_docobject(*images, **kwargs)
|
||||||
|
if outputstream:
|
||||||
|
pdf.tostream(outputstream)
|
||||||
|
return
|
||||||
return pdf.tostring()
|
return pdf.tostring()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -7146,6 +7146,16 @@ def test_general(general_input, engine):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_return_engine_doc(tmp_path_factory):
|
||||||
|
inputf = os.path.join(os.path.dirname(__file__), "tests", "input", "normal.jpg")
|
||||||
|
outputf = tmp_path_factory.mktemp("return_engine_doc") / "normal.jpg.pdf"
|
||||||
|
pdf_wrapper = img2pdf.convert_to_docobject(inputf, engine=img2pdf.Engine.pikepdf)
|
||||||
|
pdf = pdf_wrapper.writer
|
||||||
|
assert isinstance(pdf, pikepdf.Pdf)
|
||||||
|
pdf.save(outputf, min_version=pdf_wrapper.output_version, linearize=True)
|
||||||
|
assert os.path.isfile(outputf)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
normal16 = alpha_value()[:, :, 0:3]
|
normal16 = alpha_value()[:, :, 0:3]
|
||||||
pathlib.Path("test.icc").write_bytes(icc_profile())
|
pathlib.Path("test.icc").write_bytes(icc_profile())
|
||||||
|
|
You split
tostream()
intofinalize()
andtostream()
but then why does the newtostream()
not callfinalize()
?Because I though the embedder of
convert_to_docobject()
should not have to invokefinalize()
.Instead,
finalize()
technically belongs intoconvert_to_docobject()
itself, after all image pages have been added. Sotostream()
cannot alsofinalize()
as that would result in a double call.