Write to stream instead of building large bytestrings in memory

This commit is contained in:
James R. Barlow 2016-01-18 10:53:46 -08:00
parent 8951d7dd05
commit e9bcce0afc

View file

@ -202,34 +202,39 @@ class pdfdoc(object):
self.addobj(image) self.addobj(image)
def tostring(self): def tostring(self):
stream = cStringIO()
self.tostream(stream)
return stream.getvalue()
def tostream(self, stream):
# add info as last object # add info as last object
self.addobj(self.info) self.addobj(self.info)
xreftable = list() xreftable = list()
result = ("%%PDF-1.%d\n"%self.version).encode() stream.write(("%%PDF-1.%d\n"%self.version).encode())
xreftable.append(b"0000000000 65535 f \n") xreftable.append(b"0000000000 65535 f \n")
for o in self.objects: for o in self.objects:
xreftable.append(("%010d 00000 n \n"%len(result)).encode()) xreftable.append(("%010d 00000 n \n"%stream.tell()).encode())
result += o.tostring() stream.write(o.tostring())
xrefoffset = len(result) xrefoffset = stream.tell()
result += b"xref\n" stream.write(b"xref\n")
result += ("0 %d\n"%len(xreftable)).encode() stream.write(("0 %d\n"%len(xreftable)).encode())
for x in xreftable: for x in xreftable:
result += x stream.write(x)
result += b"trailer\n" stream.write(b"trailer\n")
result += parse({b"/Size": len(xreftable), b"/Info": self.info, b"/Root": self.catalog})+b"\n" stream.write(parse({b"/Size": len(xreftable), b"/Info": self.info, b"/Root": self.catalog})+b"\n")
result += b"startxref\n" stream.write(b"startxref\n")
result += ("%d\n"%xrefoffset).encode() stream.write(("%d\n"%xrefoffset).encode())
result += b"%%EOF\n" stream.write(b"%%EOF\n")
return result
def convert(images, dpi=None, pagesize=(None, None, None), title=None, def convert(images, dpi=None, pagesize=(None, None, None), title=None,
author=None, creator=None, producer=None, creationdate=None, author=None, creator=None, producer=None, creationdate=None,
moddate=None, subject=None, keywords=None, colorspace=None, moddate=None, subject=None, keywords=None, colorspace=None,
nodate=False, verbose=False): nodate=False, verbose=False, outputstream=None):
pagesize_options = pagesize[2] pagesize_options = pagesize[2]
@ -344,6 +349,10 @@ def convert(images, dpi=None, pagesize=(None, None, None), title=None,
pdf.addimage(color, width, height, imgformat, imgdata, pdf_x, pdf_y) pdf.addimage(color, width, height, imgformat, imgdata, pdf_x, pdf_y)
if outputstream:
pdf.tostream(outputstream)
return outputstream
return pdf.tostring() return pdf.tostring()
def get_ndpi(width, height, pagesize): def get_ndpi(width, height, pagesize):
@ -644,12 +653,11 @@ def main(args=None):
args = sys.argv[1:] args = sys.argv[1:]
args = parser.parse_args(args) args = parser.parse_args(args)
args.output.write(
convert( convert(
args.images, args.dpi, args.pagesize, args.title, args.author, args.images, args.dpi, args.pagesize, args.title, args.author,
args.creator, args.producer, args.creationdate, args.moddate, args.creator, args.producer, args.creationdate, args.moddate,
args.subject, args.keywords, args.colorspace, args.nodate, args.subject, args.keywords, args.colorspace, args.nodate,
args.verbose)) args.verbose, outputstream=args.output)
if __name__ == '__main__': if __name__ == '__main__':
main() main()