Break out TIFF G4 transcoding into its own function

8 years ago · c27505486e
parent 95f84f97bc
commit c27505486e
1 changed files with 52 additions and 23 deletions
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@ -609,6 +609,45 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None):
    return (color, ndpi, imgwidthpx, imgheightpx)
 def transcode_monochrome(imgdata):
    """Convert the open PIL.Image imgdata to compressed CCITT Group4 data"""
    from PIL import TiffImagePlugin
    logging.debug("Converting monochrome to CCITT Group4")
    # Convert the image to Group 4 in memory. If libtiff is not installed and
    # Pillow is not compiled against it, .save() will raise an exception.
    newimgio = BytesIO()
    imgdata.save(newimgio, format='TIFF', compression='group4')
    # Open new image in memory
    newimgio.seek(0)
    newimg = Image.open(newimgio)
    # If Pillow is passed an invalid compression argument it will ignore it;
    # make sure the image actually got compressed.
    if newimg.info['compression'] != 'group4':
        raise ValueError("Image not compressed as expected")
    # Read the TIFF tags to find the offset(s) of the compressed data strips.
    strip_offsets = newimg.tag_v2[TiffImagePlugin.STRIPOFFSETS]
    strip_bytes = newimg.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS]
    rows_per_strip = newimg.tag_v2[TiffImagePlugin.ROWSPERSTRIP]
    # PIL always seems to create a single strip even for very large TIFFs when
    # it saves images, so assume we only have to read a single strip.
    # A test ~10 GPixel image was still encoded as a single strip. Just to be
    # safe check throw an error if there is more than one offset.
    if len(strip_offsets) > 1:
        raise NotImplementedError("Transcoding multiple strips not supported")
    newimgio.seek(strip_offsets[0])
    ccittdata = newimgio.read(strip_bytes[0])
    return ccittdata
 def read_images(rawdata, colorspace, first_frame_only=False):
    im = BytesIO(rawdata)
    im.seek(0)
@ -663,30 +702,20 @@ def read_images(rawdata, colorspace, first_frame_only=False):
            color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
                    imgdata, imgformat, default_dpi, colorspace)
            newimg = None
            if color == Colorspace['1']:
-                logging.debug("Converting monochrome to CCITT Group4")
+                try:
-                # Convert the image to Group 4 in memory
+                    ccittdata = transcode_monochrome(imgdata)
-                newimgio = BytesIO()
+                    imgformat = ImageFormat.CCITTGroup4
-                imgdata.save(newimgio, format='TIFF', compression='group4')
+                    result.append((color, ndpi, imgformat, ccittdata,
-
+                        imgwidthpx, imgheightpx))
-                # Open new image in memory
+                    img_page_count += 1
-                newimgio.seek(0)
+                    continue
-                newimg = Image.open(newimgio)
+                except Exception as e:
-
+                    logging.debug(e)
-                # Obtain tags
+                    logging.debug("Converting colorspace 1 to L")
-                strip_offsets = newimg.tag_v2[273]
+                    newimg = imgdata.convert('L')
-                strip_bytes = newimg.tag_v2[279]
+                    color = Colorspace.L
                rows_per_strip = newimg.tag_v2[278]
                newimgio.seek(strip_offsets[0])
                ccittdata = newimgio.read(strip_bytes[0])
                logging.debug("Extracted %i bytes from image" % len(ccittdata))
                imgformat = ImageFormat.CCITTGroup4
                result.append((color, ndpi, imgformat, ccittdata,
                    imgwidthpx, imgheightpx))
                img_page_count += 1
                continue
            elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK,
                           Colorspace["CMYK;I"]]:
                logging.debug("Colorspace is OK: %s", color)