allow writing the pdf to a file-like object instead of storing everything in memory

This commit is contained in:
James R. Barlow 2016-02-13 08:51:20 +01:00 committed by Johannes 'josch' Schauer
parent 31a2ce231f
commit 11affb6264

View file

@ -177,7 +177,7 @@ class MyPdfWriter():
obj.identifier = newid obj.identifier = newid
self.objects.append(obj) self.objects.append(obj)
def getstring(self, info): def tostream(self, info, stream):
xreftable = list() xreftable = list()
# justification of the random binary garbage in the header from # justification of the random binary garbage in the header from
@ -194,8 +194,9 @@ class MyPdfWriter():
# #
# the choice of binary characters is arbitrary but those four seem to # the choice of binary characters is arbitrary but those four seem to
# be used elsewhere. # be used elsewhere.
result = ('%%PDF-%s\n' % self.version).encode('ascii') pdfheader = ('%%PDF-%s\n' % self.version).encode('ascii')
result += b'%\xe2\xe3\xcf\xd3\n' pdfheader += b'%\xe2\xe3\xcf\xd3\n'
stream.write(pdfheader)
# From section 3.4.3 of the PDF Reference (version 1.7): # From section 3.4.3 of the PDF Reference (version 1.7):
# #
@ -219,23 +220,26 @@ class MyPdfWriter():
# #
# Since we chose to use a single character eol marker, we preceed it by # Since we chose to use a single character eol marker, we preceed it by
# a space # a space
pos = len(pdfheader)
xreftable.append(b"0000000000 65535 f \n") xreftable.append(b"0000000000 65535 f \n")
for o in self.objects: for o in self.objects:
xreftable.append(("%010d 00000 n \n" % len(result)).encode()) xreftable.append(("%010d 00000 n \n" % pos).encode())
result += o.tostring() content = o.tostring()
stream.write(content)
pos += len(content)
xrefoffset = len(result) xrefoffset = pos
result += b"xref\n" stream.write(b"xref\n")
result += ("0 %d\n" % len(xreftable)).encode() stream.write(("0 %d\n" % len(xreftable)).encode())
for x in xreftable: for x in xreftable:
result += x stream.write(x)
result += b"trailer\n" stream.write(b"trailer\n")
result += parse({b"/Size": len(xreftable), b"/Info": info, stream.write(parse({b"/Size": len(xreftable), b"/Info": info,
b"/Root": self.catalog})+b"\n" b"/Root": self.catalog})+b"\n")
result += b"startxref\n" stream.write(b"startxref\n")
result += ("%d\n" % xrefoffset).encode() stream.write(("%d\n" % xrefoffset).encode())
result += b"%%EOF\n" stream.write(b"%%EOF\n")
return result return
def addpage(self, page): def addpage(self, page):
page[b"/Parent"] = self.pages page[b"/Parent"] = self.pages
@ -393,6 +397,11 @@ class pdfdoc(object):
self.writer.addobj(image) self.writer.addobj(image)
def tostring(self): def tostring(self):
stream = BytesIO()
self.tostream(stream)
return stream.getvalue()
def tostream(self, outputstream):
if self.with_pdfrw: if self.with_pdfrw:
from pdfrw import PdfDict, PdfName, PdfArray, PdfObject from pdfrw import PdfDict, PdfName, PdfArray, PdfObject
else: else:
@ -509,12 +518,10 @@ class pdfdoc(object):
# now write out the PDF # now write out the PDF
if self.with_pdfrw: if self.with_pdfrw:
outf = BytesIO()
self.writer.trailer.Info = self.info self.writer.trailer.Info = self.info
self.writer.write(outf) self.writer.write(outputstream)
return outf.getvalue()
else: else:
return self.writer.getstring(self.info) self.writer.tostream(self.info, outputstream)
def read_image(rawdata, colorspace): def read_image(rawdata, colorspace):
@ -834,6 +841,9 @@ def default_layout_fun(imgwidthpx, imgheightpx, ndpi):
return pagewidth, pageheight, imgwidthpdf, imgheightpdf return pagewidth, pageheight, imgwidthpdf, imgheightpdf
# given one or more input image, depending on outputstream, either return a
# string containing the whole PDF if outputstream is None or write the PDF
# data to the given file-like object and return None
def convert(*images, title=None, def convert(*images, title=None,
author=None, creator=None, producer=None, creationdate=None, author=None, creator=None, producer=None, creationdate=None,
moddate=None, subject=None, keywords=None, colorspace=None, moddate=None, subject=None, keywords=None, colorspace=None,
@ -841,7 +851,7 @@ def convert(*images, title=None,
viewer_initial_page=None, viewer_magnification=None, viewer_initial_page=None, viewer_magnification=None,
viewer_page_layout=None, viewer_fit_window=False, viewer_page_layout=None, viewer_fit_window=False,
viewer_center_window=False, viewer_fullscreen=False, viewer_center_window=False, viewer_fullscreen=False,
with_pdfrw=True): with_pdfrw=True, outputstream=None):
pdf = pdfdoc("1.3", title, author, creator, producer, creationdate, pdf = pdfdoc("1.3", title, author, creator, producer, creationdate,
moddate, subject, keywords, nodate, viewer_panes, moddate, subject, keywords, nodate, viewer_panes,
@ -867,6 +877,10 @@ def convert(*images, title=None,
imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, imgwidthpdf, imgheightpdf, imgxpdf, imgypdf,
pagewidth, pageheight) pagewidth, pageheight)
if outputstream:
pdf.tostream(outputstream)
return
return pdf.tostring() return pdf.tostring()
@ -1497,21 +1511,20 @@ values set via the --border option.
parser.prog) parser.prog)
exit(2) exit(2)
args.output.write( convert(
convert( *args.images, title=args.title, author=args.author,
*args.images, title=args.title, author=args.author, creator=args.creator, producer=args.producer,
creator=args.creator, producer=args.producer, creationdate=args.creationdate, moddate=args.moddate,
creationdate=args.creationdate, moddate=args.moddate, subject=args.subject, keywords=args.keywords,
subject=args.subject, keywords=args.keywords, colorspace=args.colorspace, nodate=args.nodate, layout_fun=layout_fun,
colorspace=args.colorspace, nodate=args.nodate, viewer_panes=args.viewer_panes,
layout_fun=layout_fun, viewer_panes=args.viewer_panes, viewer_initial_page=args.viewer_initial_page,
viewer_initial_page=args.viewer_initial_page, viewer_magnification=args.viewer_magnification,
viewer_magnification=args.viewer_magnification, viewer_page_layout=args.viewer_page_layout,
viewer_page_layout=args.viewer_page_layout, viewer_fit_window=args.viewer_fit_window,
viewer_fit_window=args.viewer_fit_window, viewer_center_window=args.viewer_center_window,
viewer_center_window=args.viewer_center_window, viewer_fullscreen=args.viewer_fullscreen, with_pdfrw=not
viewer_fullscreen=args.viewer_fullscreen, args.without_pdfrw, outputstream=args.output)
with_pdfrw=not args.without_pdfrw))
if __name__ == '__main__': if __name__ == '__main__':
main() main()