From c76f1344a525c009bbdf3747c1595fd78c7a367e Mon Sep 17 00:00:00 2001 From: "Ryan C. Thompson" Date: Wed, 5 Nov 2014 23:46:47 -0800 Subject: [PATCH 1/3] Avoid leaking file descriptors This change prevents img2pdf from opening *all* input files at once, which means it now works with thousands of input files. --- src/img2pdf.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index ae6a9fd..16745c7 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -204,7 +204,9 @@ def convert(images, dpi, x, y, title=None, author=None, creator=None, producer=N pdf = pdfdoc(3, title, author, creator, producer, creationdate, moddate, subject, keywords) - for im in images: + for imfilename in images: + debug_out("Reading %s"%imfilename, verbose) + im = open(imfilename, "rb") rawdata = im.read() im.seek(0) try: @@ -295,7 +297,7 @@ def valid_date(string): parser = argparse.ArgumentParser( description='Lossless conversion/embedding of images (in)to pdf') parser.add_argument( - 'images', metavar='infile', type=argparse.FileType('rb'), + 'images', metavar='infile', type=str, nargs='+', help='input file(s)') parser.add_argument( '-o', '--output', metavar='out', type=argparse.FileType('wb'), From b726afbb5ad07144df381cdd859b4c8f7010c472 Mon Sep 17 00:00:00 2001 From: "Ryan C. Thompson" Date: Wed, 5 Nov 2014 23:47:42 -0800 Subject: [PATCH 2/3] Convert unrecognized colorspaces to RGB Instead of crashing on an unrecognized colorspace, we now do imgdata.convert('RGB'). --- src/img2pdf.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/img2pdf.py b/src/img2pdf.py index 16745c7..24e92a3 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -265,8 +265,15 @@ def convert(images, dpi, x, y, title=None, author=None, creator=None, producer=N else: # because we do not support /CCITTFaxDecode if color == '1': + debug_out("Converting colorspace 1 to L", verbose) imgdata = imgdata.convert('L') color = 'L' + elif color in ("RGB", "L"): + debug_out("Colorspace is OK: %s"%color, verbose) + else: + debug_out("Converting colorspace %s to RGB"%color, verbose) + imgdata = imgdata.convert('RGB') + color = imgdata.mode imgdata = zlib.compress(imgdata.tostring()) # pdf units = 1/72 inch From d09cd0f1973724f1a1eb6656176762398819a539 Mon Sep 17 00:00:00 2001 From: "Ryan C. Thompson" Date: Thu, 6 Nov 2014 00:53:16 -0800 Subject: [PATCH 3/3] Use "with" to open and close input files --- src/img2pdf.py | 82 ++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index 24e92a3..fd845af 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -206,51 +206,51 @@ def convert(images, dpi, x, y, title=None, author=None, creator=None, producer=N for imfilename in images: debug_out("Reading %s"%imfilename, verbose) - im = open(imfilename, "rb") - rawdata = im.read() - im.seek(0) - try: - imgdata = Image.open(im) - except IOError as e: - # test if it is a jpeg2000 image - if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": - error_out("cannot read input image (not jpeg2000)") - error_out("PIL: %s"%e) - exit(1) - # image is jpeg2000 - width, height, ics = parsejp2(rawdata) - imgformat = "JPEG2000" + with open(imfilename, "rb") as im: + rawdata = im.read() + im.seek(0) + try: + imgdata = Image.open(im) + except IOError as e: + # test if it is a jpeg2000 image + if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": + error_out("cannot read input image (not jpeg2000)") + error_out("PIL: %s"%e) + exit(1) + # image is jpeg2000 + width, height, ics = parsejp2(rawdata) + imgformat = "JPEG2000" - if dpi: - ndpi = dpi, dpi - debug_out("input dpi (forced) = %d x %d"%ndpi, verbose) - else: - ndpi = (96, 96) # TODO: read real dpi - debug_out("input dpi = %d x %d"%ndpi, verbose) + if dpi: + ndpi = dpi, dpi + debug_out("input dpi (forced) = %d x %d"%ndpi, verbose) + else: + ndpi = (96, 96) # TODO: read real dpi + debug_out("input dpi = %d x %d"%ndpi, verbose) - if colorspace: - color = colorspace - debug_out("input colorspace (forced) = %s"%(ics)) + if colorspace: + color = colorspace + debug_out("input colorspace (forced) = %s"%(ics)) + else: + color = ics + debug_out("input colorspace = %s"%(ics), verbose) else: - color = ics - debug_out("input colorspace = %s"%(ics), verbose) - else: - width, height = imgdata.size - imgformat = imgdata.format + width, height = imgdata.size + imgformat = imgdata.format - if dpi: - ndpi = dpi, dpi - debug_out("input dpi (forced) = %d x %d"%ndpi, verbose) - else: - ndpi = imgdata.info.get("dpi", (96, 96)) - debug_out("input dpi = %d x %d"%ndpi, verbose) + if dpi: + ndpi = dpi, dpi + debug_out("input dpi (forced) = %d x %d"%ndpi, verbose) + else: + ndpi = imgdata.info.get("dpi", (96, 96)) + debug_out("input dpi = %d x %d"%ndpi, verbose) - if colorspace: - color = colorspace - debug_out("input colorspace (forced) = %s"%(color), verbose) - else: - color = imgdata.mode - debug_out("input colorspace = %s"%(color), verbose) + if colorspace: + color = colorspace + debug_out("input colorspace (forced) = %s"%(color), verbose) + else: + color = imgdata.mode + debug_out("input colorspace = %s"%(color), verbose) debug_out("width x height = %d x %d"%(width,height), verbose) debug_out("imgformat = %s"%imgformat, verbose) @@ -286,8 +286,6 @@ def convert(images, dpi, x, y, title=None, author=None, creator=None, producer=N pdf.addimage(color, width, height, imgformat, imgdata, pdf_x, pdf_y) - im.close() - return pdf.tostring()