convert()
: add option to get engine document (e.g. pikepdf.Pdf)
#203
|
@ -1075,7 +1075,7 @@ class pdfdoc(object):
|
|||
self.tostream(stream)
|
||||
return stream.getvalue()
|
||||
|
||||
def todoc(self):
|
||||
def finalize(self):
|
||||
|
||||
if self.engine == Engine.pikepdf:
|
||||
PdfArray = pikepdf.Array
|
||||
PdfDict = pikepdf.Dictionary
|
||||
|
@ -1267,11 +1267,9 @@ class pdfdoc(object):
|
|||
self.writer.addobj(metadata)
|
||||
self.writer.addobj(iccstream)
|
||||
|
||||
return self.writer, self.output_version
|
||||
|
||||
def tostream(self, outputstream):
|
||||
# write out the PDF
|
||||
self.todoc() # finalize self.writer
|
||||
# this assumes that finalize() has been invoked beforehand by the caller
|
||||
if self.engine == Engine.pikepdf:
|
||||
kwargs = {}
|
||||
if pikepdf.__version__ >= "6.2.0":
|
||||
|
@ -2610,14 +2608,11 @@ def find_scale(pagewidth, pageheight):
|
|||
return 10 ** ceil(log10(oversized))
|
||||
|
||||
|
||||
# given one or more input image, depending on outputstream, either return a
|
||||
# string containing the whole PDF if outputstream is None or write the PDF
|
||||
# data to the given file-like object and return None
|
||||
#
|
||||
# Input images can be given as file like objects (they must implement read()),
|
||||
# as a binary string representing the image content or as filenames to the
|
||||
# images.
|
||||
def convert(*images, **kwargs):
|
||||
# Convert the image(s) to a `pdfdoc` object.
|
||||
# The `.writer` attribute holds the underlying engine document handle, and
|
||||
# `.output_version` the minimum version the caller should use when saving.
|
||||
# The main convert() wraps this implementation function.
|
||||
def convert_to_docobject(*images, **kwargs):
|
||||
_default_kwargs = dict(
|
||||
engine=None,
|
||||
title=None,
|
||||
|
@ -2638,8 +2633,6 @@ def convert(*images, **kwargs):
|
|||
viewer_fit_window=False,
|
||||
viewer_center_window=False,
|
||||
viewer_fullscreen=False,
|
||||
outputstream=None,
|
||||
return_engine_doc=False,
|
||||
first_frame_only=False,
|
||||
allow_oversized=True,
|
||||
cropborder=None,
|
||||
|
@ -2802,13 +2795,22 @@ def convert(*images, **kwargs):
|
|||
iccp,
|
||||
)
|
||||
|
||||
if kwargs["outputstream"]:
|
||||
pdf.tostream(kwargs["outputstream"])
|
||||
pdf.finalize()
|
||||
return pdf
|
||||
|
||||
|
||||
# given one or more input image, depending on outputstream, either return a
|
||||
# string containing the whole PDF if outputstream is None or write the PDF
|
||||
# data to the given file-like object and return None
|
||||
#
|
||||
# Input images can be given as file like objects (they must implement read()),
|
||||
# as a binary string representing the image content or as filenames to the
|
||||
# images.
|
||||
def convert(*images, outputstream=None, **kwargs):
|
||||
josch
commented
Please do not change the signature of the convert() function. This is necessary to preserve API stability. You have to extract "outputstream" from kwargs. Please do not change the signature of the convert() function. This is necessary to preserve API stability. You have to extract "outputstream" from kwargs.
mara0004
commented
It's not clear to me why this is supposed to break the API -- can you explain? It's not clear to me why this is supposed to break the API -- can you explain?
AFAICS, `kwargs` is internal, and no callee expects `kwargs["outputstream"]`, right?
mara0004
commented
Relatedly, it looks like the Relatedly, it looks like the `_default_kwargs` strategy silently ignores nonexistent parameters, which is problematic. (And of course it breaks IDE completion.)
~~I see you're using kwargs to unify access of `{crop,bleed,trim,art}border`, which is fine, but why can't any others params be in the signature directly?~~
mara0004
commented
Okay, self-answering the question: a SO search yielded that specifying optional params after a The issue with invalid kwargs assumably being ignored without error still stands, though. > It's not clear to me why this is supposed to break the API -- can you explain?
Okay, self-answering the question: a SO search yielded that specifying optional params after a `*capture` raises a `SyntaxError` on Python 2. Never having written code for Python 2, I did not know that. I'll change to extract from kwargs as you said, then.
The issue with invalid kwargs assumably being ignored without error still stands, though.
|
||||
pdf = convert_to_docobject(*images, **kwargs)
|
||||
if outputstream:
|
||||
pdf.tostream(outputstream)
|
||||
return
|
||||
|
||||
if kwargs["return_engine_doc"]:
|
||||
return pdf.todoc()
|
||||
|
||||
return pdf.tostring()
|
||||
|
||||
|
||||
|
|
|
@ -7149,9 +7149,10 @@ def test_general(general_input, engine):
|
|||
def test_return_engine_doc(tmp_path_factory):
|
||||
inputf = os.path.join(os.path.dirname(__file__), "tests", "input", "normal.jpg")
|
||||
outputf = tmp_path_factory.mktemp("return_engine_doc") / "normal.jpg.pdf"
|
||||
pdf, min_version = img2pdf.convert(inputf, return_engine_doc=True, engine=img2pdf.Engine.pikepdf)
|
||||
pdf_wrapper = img2pdf.convert_to_docobject(inputf, engine=img2pdf.Engine.pikepdf)
|
||||
pdf = pdf.writer
|
||||
assert isinstance(pdf, pikepdf.Pdf)
|
||||
pdf.save(outputf, min_version=min_version, linearize=True)
|
||||
pdf.save(outputf, min_version=pdf_wrapper.output_version, linearize=True)
|
||||
assert os.path.isfile(outputf)
|
||||
|
||||
|
||||
|
|
You split
tostream()
intofinalize()
andtostream()
but then why does the newtostream()
not callfinalize()
?Because I though the embedder of
convert_to_docobject()
should not have to invokefinalize()
.Instead,
finalize()
technically belongs intoconvert_to_docobject()
itself, after all image pages have been added. Sotostream()
cannot alsofinalize()
as that would result in a double call.