From c76f1344a525c009bbdf3747c1595fd78c7a367e Mon Sep 17 00:00:00 2001
From: "Ryan C. Thompson" <rct@thompsonclan.org>
Date: Wed, 5 Nov 2014 23:46:47 -0800
Subject: [PATCH 1/3] Avoid leaking file descriptors

This change prevents img2pdf from opening *all* input files at once,
which means it now works with thousands of input files.
---
 src/img2pdf.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/img2pdf.py b/src/img2pdf.py
index ae6a9fd..16745c7 100755
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@@ -204,7 +204,9 @@ def convert(images, dpi, x, y, title=None, author=None, creator=None, producer=N
     pdf = pdfdoc(3, title, author, creator, producer, creationdate,
                  moddate, subject, keywords)
 
-    for im in images:
+    for imfilename in images:
+        debug_out("Reading %s"%imfilename, verbose)
+        im = open(imfilename, "rb")
         rawdata = im.read()
         im.seek(0)
         try:
@@ -295,7 +297,7 @@ def valid_date(string):
 parser = argparse.ArgumentParser(
     description='Lossless conversion/embedding of images (in)to pdf')
 parser.add_argument(
-    'images', metavar='infile', type=argparse.FileType('rb'),
+    'images', metavar='infile', type=str,
     nargs='+', help='input file(s)')
 parser.add_argument(
     '-o', '--output', metavar='out', type=argparse.FileType('wb'),

From b726afbb5ad07144df381cdd859b4c8f7010c472 Mon Sep 17 00:00:00 2001
From: "Ryan C. Thompson" <rct@thompsonclan.org>
Date: Wed, 5 Nov 2014 23:47:42 -0800
Subject: [PATCH 2/3] Convert unrecognized colorspaces to RGB

Instead of crashing on an unrecognized colorspace, we now do
imgdata.convert('RGB').
---
 src/img2pdf.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/img2pdf.py b/src/img2pdf.py
index 16745c7..24e92a3 100755
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@@ -265,8 +265,15 @@ def convert(images, dpi, x, y, title=None, author=None, creator=None, producer=N
         else:
             # because we do not support /CCITTFaxDecode
             if color == '1':
+                debug_out("Converting colorspace 1 to L", verbose)
                 imgdata = imgdata.convert('L')
                 color = 'L'
+            elif color in ("RGB", "L"):
+                debug_out("Colorspace is OK: %s"%color, verbose)
+            else:
+                debug_out("Converting colorspace %s to RGB"%color, verbose)
+                imgdata = imgdata.convert('RGB')
+                color = imgdata.mode
             imgdata = zlib.compress(imgdata.tostring())
 
         # pdf units = 1/72 inch

From d09cd0f1973724f1a1eb6656176762398819a539 Mon Sep 17 00:00:00 2001
From: "Ryan C. Thompson" <rct@thompsonclan.org>
Date: Thu, 6 Nov 2014 00:53:16 -0800
Subject: [PATCH 3/3] Use "with" to open and close input files

---
 src/img2pdf.py | 82 ++++++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 42 deletions(-)

diff --git a/src/img2pdf.py b/src/img2pdf.py
index 24e92a3..fd845af 100755
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@@ -206,51 +206,51 @@ def convert(images, dpi, x, y, title=None, author=None, creator=None, producer=N
 
     for imfilename in images:
         debug_out("Reading %s"%imfilename, verbose)
-        im = open(imfilename, "rb")
-        rawdata = im.read()
-        im.seek(0)
-        try:
-            imgdata = Image.open(im)
-        except IOError as e:
-            # test if it is a jpeg2000 image
-            if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
-                error_out("cannot read input image (not jpeg2000)")
-                error_out("PIL: %s"%e)
-                exit(1)
-            # image is jpeg2000
-            width, height, ics = parsejp2(rawdata)
-            imgformat = "JPEG2000"
+        with open(imfilename, "rb") as im:
+            rawdata = im.read()
+            im.seek(0)
+            try:
+                imgdata = Image.open(im)
+            except IOError as e:
+                # test if it is a jpeg2000 image
+                if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
+                    error_out("cannot read input image (not jpeg2000)")
+                    error_out("PIL: %s"%e)
+                    exit(1)
+                # image is jpeg2000
+                width, height, ics = parsejp2(rawdata)
+                imgformat = "JPEG2000"
 
-            if dpi:
-                ndpi = dpi, dpi
-                debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)
-            else:
-                ndpi = (96, 96) # TODO: read real dpi
-                debug_out("input dpi = %d x %d"%ndpi, verbose)
+                if dpi:
+                    ndpi = dpi, dpi
+                    debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)
+                else:
+                    ndpi = (96, 96) # TODO: read real dpi
+                    debug_out("input dpi = %d x %d"%ndpi, verbose)
 
-            if colorspace:
-                color = colorspace
-                debug_out("input colorspace (forced) = %s"%(ics))
+                if colorspace:
+                    color = colorspace
+                    debug_out("input colorspace (forced) = %s"%(ics))
+                else:
+                    color = ics
+                    debug_out("input colorspace = %s"%(ics), verbose)
             else:
-                color = ics
-                debug_out("input colorspace = %s"%(ics), verbose)
-        else:
-            width, height = imgdata.size
-            imgformat = imgdata.format
+                width, height = imgdata.size
+                imgformat = imgdata.format
 
-            if dpi:
-                ndpi = dpi, dpi
-                debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)
-            else:
-                ndpi = imgdata.info.get("dpi", (96, 96))
-                debug_out("input dpi = %d x %d"%ndpi, verbose)
+                if dpi:
+                    ndpi = dpi, dpi
+                    debug_out("input dpi (forced) = %d x %d"%ndpi, verbose)
+                else:
+                    ndpi = imgdata.info.get("dpi", (96, 96))
+                    debug_out("input dpi = %d x %d"%ndpi, verbose)
 
-            if colorspace:
-                color = colorspace
-                debug_out("input colorspace (forced) = %s"%(color), verbose)
-            else:
-                color = imgdata.mode
-                debug_out("input colorspace = %s"%(color), verbose)
+                if colorspace:
+                    color = colorspace
+                    debug_out("input colorspace (forced) = %s"%(color), verbose)
+                else:
+                    color = imgdata.mode
+                    debug_out("input colorspace = %s"%(color), verbose)
 
         debug_out("width x height = %d x %d"%(width,height), verbose)
         debug_out("imgformat = %s"%imgformat, verbose)
@@ -286,8 +286,6 @@ def convert(images, dpi, x, y, title=None, author=None, creator=None, producer=N
 
         pdf.addimage(color, width, height, imgformat, imgdata, pdf_x, pdf_y)
 
-        im.close()
-
     return pdf.tostring()