Test cases for transparency

Test support on macOS
Always initialize smask
2021-08-19 17:42:46 +02:00 · 2021-08-19 17:42:43 +02:00 · 2021-08-19 17:39:10 +02:00 · 2021-08-19 17:39:10 +02:00 · 2021-08-19 17:39:10 +02:00 · 2021-08-19 17:39:10 +02:00
3 changed files with 541 additions and 343 deletions
--- a/README.md
+++ b/README.md
@ -80,7 +80,17 @@ Bugs
 - Input images with alpha channels are not allowed. PDF only supports
   transparency using binary masks but is unable to store 8-bit transparency
   information as part of the image itself. But img2pdf will always be lossless
-   and thus, input images must not carry transparency information.
+   and thus, input images must not carry transparency information. You can
+   remove the alpha channel for example with imagemagick:
+
+    convert input.png -background white -alpha remove -alpha off output.png
+
+ - An error is produced if the input image is broken. This commonly happens if
+   the input image has an invalid EXIF Orientation value of zero. Even though
+   only nine different values from 1 to 9 are permitted, Anroid phones and
+   Canon DSLR cameras produce JPEG images with the invalid value of zero.
+   Either fix your input images with `exiftool` or similar software before
+   passing the JPEG to `img2pdf` or run `img2pdf` with `--rotation=ifvalid`.

 - img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the
   input if necessary. To prevent decompression bomb denial of service attacks,
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@ -85,9 +85,9 @@ FitMode = Enum("FitMode", "into fill exact shrink enlarge")

 PageOrientation = Enum("PageOrientation", "portrait landscape")

-Colorspace = Enum("Colorspace", "RGB L 1 CMYK CMYK;I RGBA P other")
+Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P other")

-ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG TIFF MPO other")
+ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO other")

 PageMode = Enum("PageMode", "none outlines thumbs")

@ -742,6 +742,7 @@ class pdfdoc(object):
        imgheightpx,
        imgformat,
        imgdata,
+        smaskdata,
        imgwidthpdf,
        imgheightpdf,
        imgxpdf,
@ -759,6 +760,11 @@ class pdfdoc(object):
        artborder=None,
        iccp=None,
    ):
+        assert (
+            (color != Colorspace.RGBA and color != Colorspace.LA)
+            or (imgformat == ImageFormat.PNG and smaskdata is not None)
+        )
+
        if self.engine == Engine.pikepdf:
            PdfArray = pikepdf.Array
            PdfDict = pikepdf.Dictionary
@ -777,9 +783,9 @@ class pdfdoc(object):
        TrueObject = True if self.engine == Engine.pikepdf else PdfObject("true")
        FalseObject = False if self.engine == Engine.pikepdf else PdfObject("false")

-        if color == Colorspace["1"] or color == Colorspace.L:
+        if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA:
            colorspace = PdfName.DeviceGray
-        elif color == Colorspace.RGB:
+        elif color == Colorspace.RGB or color == Colorspace.RGBA:
            colorspace = PdfName.DeviceRGB
        elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
            colorspace = PdfName.DeviceCMYK
@ -816,9 +822,9 @@ class pdfdoc(object):
            else:
                iccpdict = PdfDict(stream=convert_load(iccp))
            iccpdict[PdfName.Alternate] = colorspace
-            if color == Colorspace["1"] or color == Colorspace.L:
+            if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA:
                iccpdict[PdfName.N] = 1
-            elif color == Colorspace.RGB:
+            elif color == Colorspace.RGB or color == Colorspace.RGBA:
                iccpdict[PdfName.N] = 3
            elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
                iccpdict[PdfName.N] = 4
@ -852,6 +858,8 @@ class pdfdoc(object):
        image[PdfName.ColorSpace] = colorspace
        image[PdfName.BitsPerComponent] = depth

+        smask = None
+
        if color == Colorspace["CMYK;I"]:
            # Inverts all four channels
            image[PdfName.Decode] = [1, 0, 1, 0, 1, 0, 1, 0]
@ -869,9 +877,35 @@ class pdfdoc(object):
            decodeparms[PdfName.Rows] = imgheightpx
            image[PdfName.DecodeParms] = [decodeparms]
        elif imgformat is ImageFormat.PNG:
+            if smaskdata is not None:
+                if self.engine == Engine.pikepdf:
+                    smask = self.writer.make_stream(smaskdata)
+                else:
+                    smask = PdfDict(stream=convert_load(smaskdata))
+                smask[PdfName.Type] = PdfName.XObject
+                smask[PdfName.Subtype] = PdfName.Image
+                smask[PdfName.Filter] = PdfName.FlateDecode
+                smask[PdfName.Width] = imgwidthpx
+                smask[PdfName.Height] = imgheightpx
+                smask[PdfName.ColorSpace] = PdfName.DeviceGray
+                smask[PdfName.BitsPerComponent] = depth
+
+                decodeparms = PdfDict()
+                decodeparms[PdfName.Predictor] = 15
+                decodeparms[PdfName.Colors] = 1
+                decodeparms[PdfName.Columns] = imgwidthpx
+                decodeparms[PdfName.BitsPerComponent] = depth
+                smask[PdfName.DecodeParms] = decodeparms
+
+                image[PdfName.SMask] = smask
+
+                # /SMask requires PDF 1.4
+                if self.output_version < "1.4":
+                    self.output_version = "1.4"
+
            decodeparms = PdfDict()
            decodeparms[PdfName.Predictor] = 15
-            if color in [Colorspace.P, Colorspace["1"], Colorspace.L]:
+            if color in [Colorspace.P, Colorspace["1"], Colorspace.L, Colorspace.LA]:
                decodeparms[PdfName.Colors] = 1
            else:
                decodeparms[PdfName.Colors] = 3
@ -954,6 +988,8 @@ class pdfdoc(object):
            if self.engine == Engine.internal:
                self.writer.addobj(content)
                self.writer.addobj(image)
+                if smask is not None:
+                    self.writer.addobj(smask)
                if iccp is not None:
                    self.writer.addobj(iccpdict)

@ -1183,8 +1219,21 @@ def get_imgmetadata(
        # Search online for the 72.009 dpi problem for more info.
        ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
        ics = imgdata.mode
-
-    if ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info:
+    
+    # GIF and PNG files with transparency are supported
+    if (
+        (imgformat == ImageFormat.PNG or imgformat == ImageFormat.GIF)
+        and (ics in ["RGBA", "LA"] or "transparency" in imgdata.info)
+    ):
+        # Must check the IHDR chunk for the bit depth, because PIL would lossily
+        # convert 16-bit RGBA/LA images to 8-bit.
+        if imgformat == ImageFormat.PNG and rawdata is not None:
+            depth = rawdata[24]
+            if depth > 8:
+                logger.warning("Image with transparency and a bit depth of %d." % depth)
+                logger.warning("This is unsupported due to PIL limitations.")
+                raise AlphaChannelError("Refusing to work with multiple >8bit channels.")
+    elif (ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info):
        logger.warning("Image contains transparency which cannot be retained in PDF.")
        logger.warning("img2pdf will not perform a lossy operation.")
        logger.warning("You can remove the alpha channel using imagemagick:")
@ -1427,6 +1476,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
                ndpi,
                imgformat,
                rawdata,
+                None,
                imgwidthpx,
                imgheightpx,
                [],
@ -1483,6 +1533,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
                    ndpi,
                    ImageFormat.JPEG,
                    rawdata[offset : offset + mpent["Size"]],
+                    None,
                    imgwidthpx,
                    imgheightpx,
                    [],
@ -1507,31 +1558,37 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
        color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
            imgdata, imgformat, default_dpi, colorspace, rawdata, rot
        )
-        pngidat, palette = parse_png(rawdata)
-        # PIL does not provide the information about the original bits per
-        # sample. Thus, we retrieve that info manually by looking at byte 9 in
-        # the IHDR chunk. We know where to find that in the file because the
-        # IHDR chunk must be the first chunk
-        depth = rawdata[24]
-        if depth not in [1, 2, 4, 8, 16]:
-            raise ValueError("invalid bit depth: %d" % depth)
-        logger.debug("read_images() embeds a PNG")
-        cleanup()
-        return [
-            (
-                color,
-                ndpi,
-                imgformat,
-                pngidat,
-                imgwidthpx,
-                imgheightpx,
-                palette,
-                False,
-                depth,
-                rotation,
-                iccp,
-            )
-        ]
+        if (
+            color != Colorspace.RGBA
+            and color != Colorspace.LA
+            and "transparency" not in imgdata.info
+        ):
+            pngidat, palette = parse_png(rawdata)
+            # PIL does not provide the information about the original bits per
+            # sample. Thus, we retrieve that info manually by looking at byte 9 in
+            # the IHDR chunk. We know where to find that in the file because the
+            # IHDR chunk must be the first chunk
+            depth = rawdata[24]
+            if depth not in [1, 2, 4, 8, 16]:
+                raise ValueError("invalid bit depth: %d" % depth)
+            logger.debug("read_images() embeds a PNG")
+            cleanup()
+            return [
+                (
+                    color,
+                    ndpi,
+                    imgformat,
+                    pngidat,
+                    None,
+                    imgwidthpx,
+                    imgheightpx,
+                    palette,
+                    False,
+                    depth,
+                    rotation,
+                    iccp,
+                )
+            ]

    # If our input is not JPEG or PNG, then we might have a format that
    # supports multiple frames (like TIFF or GIF), so we need a loop to
@ -1615,6 +1672,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
                    ndpi,
                    ImageFormat.CCITTGroup4,
                    rawdata,
+                    None,
                    imgwidthpx,
                    imgheightpx,
                    [],
@ -1644,6 +1702,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
                        ndpi,
                        ImageFormat.CCITTGroup4,
                        ccittdata,
+                        None,
                        imgwidthpx,
                        imgheightpx,
                        [],
@ -1662,7 +1721,9 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
                color = Colorspace.L
        elif color in [
            Colorspace.RGB,
+            Colorspace.RGBA,
            Colorspace.L,
+            Colorspace.LA,
            Colorspace.CMYK,
            Colorspace["CMYK;I"],
            Colorspace.P,
@ -1682,6 +1743,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
                    ndpi,
                    imgformat,
                    imggz,
+                    None,
                    imgwidthpx,
                    imgheightpx,
                    [],
@ -1692,27 +1754,42 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
                )
            )
        else:
-            # cheapo version to retrieve a PNG encoding of the payload is to
-            # just save it with PIL. In the future this could be replaced by
-            # dedicated function applying the Paeth PNG filter to the raw pixel
-            pngbuffer = BytesIO()
-            newimg.save(pngbuffer, format="png")
-            pngidat, palette = parse_png(pngbuffer.getvalue())
-            # PIL does not provide the information about the original bits per
-            # sample. Thus, we retrieve that info manually by looking at byte 9 in
-            # the IHDR chunk. We know where to find that in the file because the
-            # IHDR chunk must be the first chunk
-            pngbuffer.seek(24)
-            depth = ord(pngbuffer.read(1))
-            if depth not in [1, 2, 4, 8, 16]:
-                raise ValueError("invalid bit depth: %d" % depth)
+            if (
+                color == Colorspace.RGBA
+                or color == Colorspace.LA
+                or "transparency" in newimg.info
+            ):
+                if color == Colorspace.RGBA:
+                    newcolor = color
+                    r, g, b, a = newimg.split()
+                    newimg = Image.merge("RGB", (r, g, b))
+                elif color == Colorspace.LA:
+                    newcolor = color
+                    l, a = newimg.split()
+                    newimg = l
+                else:
+                    newcolor = Colorspace.RGBA
+                    r, g, b, a = newimg.convert(mode="RGBA").split()
+                    newimg = Image.merge("RGB", (r, g, b))
+
+                smaskidat, _, _ = to_png_data(a)
+                logger.warning(
+                    "Image contains an alpha channel which will be stored "
+                    "as a separate soft mask (/SMask) image in PDF."
+                )
+            else:
+                newcolor = color
+                smaskidat = None
+
+            pngidat, palette, depth = to_png_data(newimg)
            logger.debug("read_images() encoded an image as PNG")
            result.append(
                (
-                    color,
+                    newcolor,
                    ndpi,
                    ImageFormat.PNG,
                    pngidat,
+                    smaskidat,
                    imgwidthpx,
                    imgheightpx,
                    palette,
@ -1726,6 +1803,23 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
    cleanup()
    return result

+def to_png_data(img):
+    # cheapo version to retrieve a PNG encoding of the payload is to
+    # just save it with PIL. In the future this could be replaced by
+    # dedicated function applying the Paeth PNG filter to the raw pixel
+    pngbuffer = BytesIO()
+    img.save(pngbuffer, format="png")
+
+    pngidat, palette = parse_png(pngbuffer.getvalue())
+    # PIL does not provide the information about the original bits per
+    # sample. Thus, we retrieve that info manually by looking at byte 9 in
+    # the IHDR chunk. We know where to find that in the file because the
+    # IHDR chunk must be the first chunk
+    pngbuffer.seek(24)
+    depth = ord(pngbuffer.read(1))
+    if depth not in [1, 2, 4, 8, 16]:
+        raise ValueError("invalid bit depth: %d" % depth)
+    return pngidat, palette, depth

 # converts a length in pixels to a length in PDF units (1/72 of an inch)
 def px_to_pt(length, dpi):
@ -2118,6 +2212,7 @@ def convert(*images, **kwargs):
            ndpi,
            imgformat,
            imgdata,
+            smaskdata,
            imgwidthpx,
            imgheightpx,
            palette,
@ -2171,6 +2266,7 @@ def convert(*images, **kwargs):
                imgheightpx,
                imgformat,
                imgdata,
+                smaskdata,
                imgwidthpdf,
                imgheightpdf,
                imgxpdf,
@ -2651,7 +2747,6 @@ def gui():

    args = {
        "engine": tkinter.StringVar(),
-        "first_frame_only": tkinter.BooleanVar(),
        "auto_orient": tkinter.BooleanVar(),
        "fit": tkinter.StringVar(),
        "title": tkinter.StringVar(),
@ -3611,8 +3706,9 @@ ifvalid, 0, 90, 180 and 270. The default value is auto and indicates that input
 images are rotated according to their EXIF Orientation tag. The values none and
 0 ignore the EXIF Orientation values of the input images. The value ifvalid
 acts like auto but ignores invalid EXIF rotation values and only issues a
-warning instead of throwing an error. The values 90, 180 and 270 perform a
-clockwise rotation of the image.
+warning instead of throwing an error. This is useful because many devices like
+Android phones, Canon cameras or scanners emit an invalid Orientation tag value
+of zero. The values 90, 180 and 270 perform a clockwise rotation of the image.
            """,
    )
    sizeargs.add_argument(
--- a/src/img2pdf_test.py
+++ b/src/img2pdf_test.py
Author	SHA1	Message	Date
Tamás Zahola	8cbe03d486	Test cases for transparency	2021-08-19 17:42:46 +02:00
Tamás Zahola	968fc0c27a	Test support on macOS	2021-08-19 17:42:43 +02:00
Tamás Zahola	cfbb40b0f6	Always initialize `smask`	2021-08-19 17:39:10 +02:00
Tamás Zahola	ff03d9c1cd	Formatting	2021-08-19 17:39:10 +02:00
Tamás Zahola	e6613d3244	Use PNG predictor for /SMask too	2021-08-19 17:39:10 +02:00
Tamás Zahola	219dbd2856	Added transparency support for GIFs, palette-based PNGs and grayscale PNGs	2021-08-19 17:39:10 +02:00
Tamás Zahola	bf51768fb4	Convert 8-bit PNG alpha channels to /SMasks in PDF	2021-08-19 17:39:09 +02:00
Michal Vasilek	4c5b72dab0	src/img2pdf_test.py: skip tests when icc profiles are not present	2021-08-19 13:08:32 +02:00
Michal Vasilek	853a1ec363	src/img2pdf_test.py: do not test format descriptions	2021-08-19 10:59:46 +00:00
Johannes Schauer Marin Rodrigues	55d589a548	README.md: document how to remove alpha channel and --rotation=ifvalid	2021-08-14 10:19:46 +02:00
Johannes Schauer Marin Rodrigues	5c617965f5	document the ifvalid option value further	2021-08-14 10:19:06 +02:00
Johannes Schauer Marin Rodrigues	0067edf965	remove first_frame_only from gui	2021-08-14 10:18:15 +02:00