Break out TIFF G4 transcoding into its own function

2016-07-21 14:42:48 -07:00 · 2016-07-21 14:42:48 -07:00 · c27505486e
commit c27505486e
parent 95f84f97bc
1 changed files with 52 additions and 23 deletions
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@ -609,6 +609,45 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None):
    return (color, ndpi, imgwidthpx, imgheightpx)


+def transcode_monochrome(imgdata):
+    """Convert the open PIL.Image imgdata to compressed CCITT Group4 data"""
+
+    from PIL import TiffImagePlugin
+
+    logging.debug("Converting monochrome to CCITT Group4")
+
+    # Convert the image to Group 4 in memory. If libtiff is not installed and
+    # Pillow is not compiled against it, .save() will raise an exception.
+    newimgio = BytesIO()
+    imgdata.save(newimgio, format='TIFF', compression='group4')
+
+    # Open new image in memory
+    newimgio.seek(0)
+    newimg = Image.open(newimgio)
+
+    # If Pillow is passed an invalid compression argument it will ignore it;
+    # make sure the image actually got compressed.
+    if newimg.info['compression'] != 'group4':
+        raise ValueError("Image not compressed as expected")
+
+    # Read the TIFF tags to find the offset(s) of the compressed data strips.
+    strip_offsets = newimg.tag_v2[TiffImagePlugin.STRIPOFFSETS]
+    strip_bytes = newimg.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS]
+    rows_per_strip = newimg.tag_v2[TiffImagePlugin.ROWSPERSTRIP]
+
+    # PIL always seems to create a single strip even for very large TIFFs when
+    # it saves images, so assume we only have to read a single strip.
+    # A test ~10 GPixel image was still encoded as a single strip. Just to be
+    # safe check throw an error if there is more than one offset.
+    if len(strip_offsets) > 1:
+        raise NotImplementedError("Transcoding multiple strips not supported")
+
+    newimgio.seek(strip_offsets[0])
+    ccittdata = newimgio.read(strip_bytes[0])
+
+    return ccittdata
+
+
 def read_images(rawdata, colorspace, first_frame_only=False):
    im = BytesIO(rawdata)
    im.seek(0)
@ -663,30 +702,20 @@ def read_images(rawdata, colorspace, first_frame_only=False):
            color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
                    imgdata, imgformat, default_dpi, colorspace)

+            newimg = None
            if color == Colorspace['1']:
-                logging.debug("Converting monochrome to CCITT Group4")
-                # Convert the image to Group 4 in memory
-                newimgio = BytesIO()
-                imgdata.save(newimgio, format='TIFF', compression='group4')
-
-                # Open new image in memory
-                newimgio.seek(0)
-                newimg = Image.open(newimgio)
-
-                # Obtain tags
-                strip_offsets = newimg.tag_v2[273]
-                strip_bytes = newimg.tag_v2[279]
-                rows_per_strip = newimg.tag_v2[278]
-
-                newimgio.seek(strip_offsets[0])
-                ccittdata = newimgio.read(strip_bytes[0])
-
-                logging.debug("Extracted %i bytes from image" % len(ccittdata))
-                imgformat = ImageFormat.CCITTGroup4
-                result.append((color, ndpi, imgformat, ccittdata,
-                    imgwidthpx, imgheightpx))
-                img_page_count += 1
-                continue
+                try:
+                    ccittdata = transcode_monochrome(imgdata)
+                    imgformat = ImageFormat.CCITTGroup4
+                    result.append((color, ndpi, imgformat, ccittdata,
+                        imgwidthpx, imgheightpx))
+                    img_page_count += 1
+                    continue
+                except Exception as e:
+                    logging.debug(e)
+                    logging.debug("Converting colorspace 1 to L")
+                    newimg = imgdata.convert('L')
+                    color = Colorspace.L
            elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK,
                           Colorspace["CMYK;I"]]:
                logging.debug("Colorspace is OK: %s", color)