use resolution (DPI) from EXIF first, if not found or invalid, then use info from Pillow image

2023-11-30 13:16:50 +08:00
8 changed files with 193 additions and 384 deletions
--- a/CHANGES.rst
+++ b/CHANGES.rst
@ -2,13 +2,6 @@
 CHANGES
 =======

-0.6.0 (2025-02-15)
------------------
-
- - Add support for JBIG2 (generic coding)
- - Add convert_to_docobject() broken out from convert()
- - Add pil_get_dpi() broken out from get_imgmetadata()
-
 0.5.1 (2023-11-26)
 ------------------

--- a/4
+++ b/4
@ -31,10 +31,6 @@ Making a new release
    $ python3 setup.py sdist
    $ twine upload dist/*

- - Push everything to git forge
-
-    $ git push
-
 Using debbisect to find regressions
 -----------------------------------

--- a/README.md
+++ b/README.md
@ -27,20 +27,18 @@ software, because the raw pixel data never has to be loaded into memory.
 The following table shows how img2pdf handles different input depending on the
 input file format and image color space.

-| Format                                | Colorspace                           | Result        |
-| ------------------------------------- | ------------------------------------ | ------------- |
-| JPEG                                  | any                                  | direct        |
-| JPEG2000                              | any                                  | direct        |
-| PNG (non-interlaced, no transparency) | any                                  | direct        |
-| TIFF (CCITT Group 4)                  | 1-bit monochrome                     | direct        |
-| JBIG2 (single-page generic coding)    | 1-bit monochrome                     | direct        |
-| any                                   | any except CMYK and 1-bit monochrome | PNG Paeth     |
-| any                                   | 1-bit monochrome                     | CCITT Group 4 |
-| any                                   | CMYK                                 | flate         |
+| Format                                | Colorspace                     | Result        |
+| ------------------------------------- | ------------------------------ | ------------- |
+| JPEG                                  | any                            | direct        |
+| JPEG2000                              | any                            | direct        |
+| PNG (non-interlaced, no transparency) | any                            | direct        |
+| TIFF (CCITT Group 4)                  | monochrome                     | direct        |
+| any                                   | any except CMYK and monochrome | PNG Paeth     |
+| any                                   | monochrome                     | CCITT Group 4 |
+| any                                   | CMYK                           | flate         |

-For JPEG, JPEG2000, non-interlaced PNG, TIFF images with CCITT Group 4
-encoded data, and JBIG2 with single-page generic coding (e.g. using `jbig2enc`),
-img2pdf directly embeds the image data into the PDF without
+For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4
+encoded data, img2pdf directly embeds the image data into the PDF without
 re-encoding it. It thus treats the PDF format merely as a container format for
 the image data. In these cases, img2pdf only increases the filesize by the size
 of the PDF container (typically around 500 to 700 bytes). Since data is only
@ -49,7 +47,7 @@ solutions for these input formats.

 For all other input types, img2pdf first has to transform the pixel data to
 make it compatible with PDF. In most cases, the PNG Paeth filter is applied to
-the pixel data. For 1-bit monochrome input, CCITT Group 4 is used instead. Only for
+the pixel data. For monochrome input, CCITT Group 4 is used instead. Only for
 CMYK input no filter is applied before finally applying flate compression.

 Usage
@ -67,12 +65,6 @@ The detailed documentation can be accessed by running:

 	$ img2pdf --help

-With no command line arguments supplied, img2pdf will read a single image from
-standard input and write the resulting PDF to standard output. Here is an
-example for how to scan directly to PDF using scanimage(1) from SANE:
-
-	$ scanimage --mode=Color --resolution=300 | pnmtojpeg -quality 90 | img2pdf > scan.pdf
-
 Bugs
 ----

--- a/setup.py
+++ b/setup.py
@ -1,7 +1,7 @@
 import sys
 from setuptools import setup

-VERSION = "0.6.0"
+VERSION = "0.5.1"

 INSTALL_REQUIRES = (
    "Pillow",
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@ -62,7 +62,7 @@ try:
 except ImportError:
    have_pikepdf = False

-__version__ = "0.6.0"
+__version__ = "0.5.1"
 default_dpi = 96.0
 papersizes = {
    "letter": "8.5inx11in",
@ -128,7 +128,7 @@ PageOrientation = Enum("PageOrientation", "portrait landscape")
 Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other")

 ImageFormat = Enum(
-    "ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF JBIG2 other"
+    "ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other"
 )

 PageMode = Enum("PageMode", "none outlines thumbs")
@ -918,11 +918,6 @@ class pdfdoc(object):
            self.output_version = "1.5"  # jpeg2000 needs pdf 1.5
        elif imgformat is ImageFormat.CCITTGroup4:
            ofilter = [PdfName.CCITTFaxDecode]
-        elif imgformat is ImageFormat.JBIG2:
-            ofilter = PdfName.JBIG2Decode
-            # JBIG2Decode requires PDF 1.4
-            if self.output_version < "1.4":
-                self.output_version = "1.4"
        else:
            ofilter = PdfName.FlateDecode

@ -1080,7 +1075,7 @@ class pdfdoc(object):
        self.tostream(stream)
        return stream.getvalue()

-    def finalize(self):
+    def tostream(self, outputstream):
        if self.engine == Engine.pikepdf:
            PdfArray = pikepdf.Array
            PdfDict = pikepdf.Dictionary
@ -1272,9 +1267,7 @@ class pdfdoc(object):
                self.writer.addobj(metadata)
                self.writer.addobj(iccstream)

-    def tostream(self, outputstream):
-        # write out the PDF
-        # this assumes that finalize() has been invoked beforehand by the caller
+        # now write out the PDF
        if self.engine == Engine.pikepdf:
            kwargs = {}
            if pikepdf.__version__ >= "6.2.0":
@ -1283,8 +1276,6 @@ class pdfdoc(object):
                outputstream, min_version=self.output_version, linearize=True, **kwargs
            )
        elif self.engine == Engine.pdfrw:
-            from pdfrw import PdfName, PdfArray
-
            self.writer.trailer.Info = self.writer.docinfo
            # setting the version attribute of the pdfrw PdfWriter object will
            # influence the behaviour of the write() function
@ -1304,27 +1295,82 @@ class pdfdoc(object):
            raise ValueError("unknown engine: %s" % self.engine)


-def pil_get_dpi(imgdata, imgformat, default_dpi):
-    ndpi = imgdata.info.get("dpi")
-    if ndpi is None:
-        # the PNG plugin of PIL adds the undocumented "aspect" field instead of
-        # the "dpi" field if the PNG pHYs chunk unit is not set to meters
-        if imgformat == ImageFormat.PNG and imgdata.info.get("aspect") is not None:
-            aspect = imgdata.info["aspect"]
-            # make sure not to go below the default dpi
-            if aspect[0] > aspect[1]:
-                ndpi = (default_dpi * aspect[0] / aspect[1], default_dpi)
-            else:
-                ndpi = (default_dpi, default_dpi * aspect[1] / aspect[0])
-        else:
-            ndpi = (default_dpi, default_dpi)
+def get_imgmetadata(
+    imgdata, imgformat, default_dpi, colorspace, rawdata=None, rotreq=None
+):

-    # In python3, the returned dpi value for some tiff images will
-    # not be an integer but a float. To make the behaviour of
-    # img2pdf the same between python2 and python3, we convert that
-    # float into an integer by rounding.
-    # Search online for the 72.009 dpi problem for more info.
-    ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
+    if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
+        # this codepath gets called if the PIL installation is not able to
+        # handle JPEG2000 files
+        imgwidthpx, imgheightpx, ics, hdpi, vdpi, channels, bpp = jp2.parse(rawdata)
+
+        if hdpi is None:
+            hdpi = default_dpi
+        if vdpi is None:
+            vdpi = default_dpi
+        ndpi = (hdpi, vdpi)
+    else:
+        imgwidthpx, imgheightpx = imgdata.size
+
+        ndpi = None
+        # For JPEG images with both EXIF tags and JFIF tags, Pillow seems reading image resolution from JFIF.
+        # However, "Preview" on Mac and "Photos" on Windows read the resolution from EXIF.
+        # We try to read the value from EXIF first
+        exif = imgdata.getexif()
+        if exif:
+            exif_res_unit = exif.get(ExifTags.Base.ResolutionUnit)
+            exif_x_res = exif.get(ExifTags.Base.XResolution)
+            exif_y_res = exif.get(ExifTags.Base.YResolution)
+            if exif_x_res and exif_y_res:
+                if (exif_res_unit == 3): # cm
+                    ndpi = (exif_x_res * 2.54, exif_y_res * 2.54)
+                else:
+                    ndpi = (exif_x_res, exif_y_res)
+
+        # if no DPI from EXIF, get it from `info`
+        if ndpi is None:
+            ndpi = imgdata.info.get("dpi")
+
+        if ndpi is None:
+            # the PNG plugin of PIL adds the undocumented "aspect" field instead of
+            # the "dpi" field if the PNG pHYs chunk unit is not set to meters
+            if imgformat == ImageFormat.PNG and imgdata.info.get("aspect") is not None:
+                aspect = imgdata.info["aspect"]
+                # make sure not to go below the default dpi
+                if aspect[0] > aspect[1]:
+                    ndpi = (default_dpi * aspect[0] / aspect[1], default_dpi)
+                else:
+                    ndpi = (default_dpi, default_dpi * aspect[1] / aspect[0])
+            else:
+                ndpi = (default_dpi, default_dpi)
+        # In python3, the returned dpi value for some tiff images will
+        # not be an integer but a float. To make the behaviour of
+        # img2pdf the same between python2 and python3, we convert that
+        # float into an integer by rounding.
+        # Search online for the 72.009 dpi problem for more info.
+        ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
+        ics = imgdata.mode
+
+    # GIF and PNG files with transparency are supported
+    if imgformat in [ImageFormat.PNG, ImageFormat.GIF, ImageFormat.JPEG2000] and (
+        ics in ["RGBA", "LA"] or "transparency" in imgdata.info
+    ):
+        # Must check the IHDR chunk for the bit depth, because PIL would lossily
+        # convert 16-bit RGBA/LA images to 8-bit.
+        if imgformat == ImageFormat.PNG and rawdata is not None:
+            depth = rawdata[24]
+            if depth > 8:
+                logger.warning("Image with transparency and a bit depth of %d." % depth)
+                logger.warning("This is unsupported due to PIL limitations.")
+                logger.warning(
+                    "If you accept a lossy conversion, you can manually convert "
+                    "your images to 8 bit using `convert -depth 8` from imagemagick"
+                )
+                raise AlphaChannelError(
+                    "Refusing to work with multiple >8bit channels."
+                )
+    elif ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info:
+        raise AlphaChannelError("This function must not be called on images with alpha")

    # Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0
    # Pillow retrieves the DPI from EXIF if it cannot find the DPI in the JPEG
@ -1341,112 +1387,11 @@ def pil_get_dpi(imgdata, imgformat, default_dpi):
            imgdata.tag_v2.get(TiffImagePlugin.Y_RESOLUTION, default_dpi),
        )

-    return ndpi
-
-
-def get_imgmetadata(
-    imgdata, imgformat, default_dpi, colorspace, rawdata=None, rotreq=None
-):
-    if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
-        # this codepath gets called if the PIL installation is not able to
-        # handle JPEG2000 files
-        imgwidthpx, imgheightpx, ics, hdpi, vdpi, channels, bpp = jp2.parse(rawdata)
-
-        if hdpi is None:
-            hdpi = default_dpi
-        if vdpi is None:
-            vdpi = default_dpi
-        ndpi = (hdpi, vdpi)
-    elif imgformat == ImageFormat.JBIG2:
-        imgwidthpx, imgheightpx, xres, yres = struct.unpack(">IIII", rawdata[24:40])
-        INCH_PER_METER = 39.370079
-        if xres == 0:
-            hdpi = default_dpi
-        elif xres < 1000:
-            # If xres is very small, it's likely accidentally expressed in dpi instead
-            # of dpm. See e.g. https://github.com/agl/jbig2enc/issues/86
-            hdpi = xres
-        else:
-            hdpi = int(float(xres) / INCH_PER_METER)
-        if yres == 0:
-            vdpi = default_dpi
-        elif yres < 1000:
-            vdpi = yres
-        else:
-            vdpi = int(float(yres) / INCH_PER_METER)
-        ndpi = (hdpi, vdpi)
-        ics = "1"
-    else:
-        imgwidthpx, imgheightpx = imgdata.size
-        ndpi = pil_get_dpi(imgdata, imgformat, default_dpi)
-        ics = imgdata.mode
-
    logger.debug("input dpi = %d x %d", *ndpi)

-    # GIF and PNG files with transparency are supported
-    if imgformat in [ImageFormat.PNG, ImageFormat.GIF, ImageFormat.JPEG2000] and (
-        ics in ["RGBA", "LA"]
-        or (imgdata is not None and "transparency" in imgdata.info)
-    ):
-        # Must check the IHDR chunk for the bit depth, because PIL would lossily
-        # convert 16-bit RGBA/LA images to 8-bit.
-        if imgformat == ImageFormat.PNG and rawdata is not None:
-            depth = rawdata[24]
-            if depth > 8:
-                logger.warning("Image with transparency and a bit depth of %d." % depth)
-                logger.warning("This is unsupported due to PIL limitations.")
-                logger.warning(
-                    "If you accept a lossy conversion, you can manually convert "
-                    "your images to 8 bit using `convert -depth 8` from imagemagick"
-                )
-                raise AlphaChannelError(
-                    "Refusing to work with multiple >8bit channels."
-                )
-    elif ics in ["LA", "PA", "RGBA"] or (
-        imgdata is not None and "transparency" in imgdata.info
-    ):
-        raise AlphaChannelError("This function must not be called on images with alpha")
-
    rotation = 0
    if rotreq in (None, Rotation.auto, Rotation.ifvalid):
-        if hasattr(imgdata, "getexif") and imgdata.getexif() is not None:
-            exif_dict = imgdata.getexif()
-            o_key = ExifTags.Base.Orientation.value  # 274 rsp. 0x112
-            if exif_dict and o_key in exif_dict:
-                # Detailed information on EXIF rotation tags:
-                # http://impulseadventure.com/photo/exif-orientation.html
-                value = exif_dict[o_key]
-                if value == 1:
-                    rotation = 0
-                elif value == 6:
-                    rotation = 90
-                elif value == 3:
-                    rotation = 180
-                elif value == 8:
-                    rotation = 270
-                elif value in (2, 4, 5, 7):
-                    if rotreq == Rotation.ifvalid:
-                        logger.warning(
-                            "Unsupported flipped rotation mode (%d): use "
-                            "--rotation=ifvalid or "
-                            "rotation=img2pdf.Rotation.ifvalid to ignore",
-                            value,
-                        )
-                    else:
-                        raise ExifOrientationError(
-                            "Unsupported flipped rotation mode (%d): use "
-                            "--rotation=ifvalid or "
-                            "rotation=img2pdf.Rotation.ifvalid to ignore" % value
-                        )
-                else:
-                    if rotreq == Rotation.ifvalid:
-                        logger.warning("Invalid rotation (%d)", value)
-                    else:
-                        raise ExifOrientationError(
-                            "Invalid rotation (%d): use --rotation=ifvalid "
-                            "or rotation=img2pdf.Rotation.ifvalid to ignore" % value
-                        )
-        elif hasattr(imgdata, "_getexif") and imgdata._getexif() is not None:
+        if hasattr(imgdata, "_getexif") and imgdata._getexif() is not None:
            for tag, value in imgdata._getexif().items():
                if TAGS.get(tag, tag) == "Orientation":
                    # Detailed information on EXIF rotation tags:
@ -1481,7 +1426,6 @@ def get_imgmetadata(
                                "Invalid rotation (%d): use --rotation=ifvalid "
                                "or rotation=img2pdf.Rotation.ifvalid to ignore" % value
                            )
-
    elif rotreq in (Rotation.none, Rotation["0"]):
        rotation = 0
    elif rotreq == Rotation["90"]:
@ -1530,7 +1474,7 @@ def get_imgmetadata(
        logger.debug("input colorspace = %s", color.name)

    iccp = None
-    if imgdata is not None and "icc_profile" in imgdata.info:
+    if "icc_profile" in imgdata.info:
        iccp = imgdata.info.get("icc_profile")
    # GIMP saves bilevel TIFF images and palette PNG images with only black and
    # white in the palette with an RGB ICC profile which is useless
@ -1699,7 +1643,6 @@ miff_re = re.compile(
    re.VERBOSE,
 )

-
 # https://imagemagick.org/script/miff.php
 # turn off black formatting until python 3.10 is available on more platforms
 # and we can use match/case
@ -1881,6 +1824,8 @@ def parse_miff(data):
                results.extend(parse_miff(rest[lenpal + lenimgdata :]))
    return results
 # fmt: on
+
+
 def read_images(
    rawdata, colorspace, first_frame_only=False, rot=None, include_thumbnails=False
 ):
@ -1894,51 +1839,7 @@ def read_images(
        if rawdata[:12] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
            # image is jpeg2000
            imgformat = ImageFormat.JPEG2000
-        elif rawdata[:8] == b"\x97\x4a\x42\x32\x0d\x0a\x1a\x0a":
-            # For now we only support single-page generic coding of JBIG2, for example as generated by
-            # https://github.com/agl/jbig2enc
-            #
-            # In fact, you can pipe an example image `like src/tests/input/mono.png` directly into img2pdf:
-            #   jbig2 src/tests/input/mono.png | img2pdf -o src/tests/output/mono.png.pdf
-            #
-            # For this we assume that the first 13 bytes are the JBIG file header describing a document with one page,
-            # followed by a "page information" segment describing the dimensions of that page.
-            #
-            # The following annotated `hexdump -C 042.jb2` shows the first 40 bytes that we inspect directly.
-            # The first 24 bytes (until "||") have to match exactly, while the following 16 bytes are read by get_imgmetadata.
-            #
-            # 97 4a 42 32 0d 0a 1a 0a  01 00 00 00 01 00 00 00
-            # \_____________________/  |  \_________/ \______
-            #       magic-bytes     org/unk  pages     seg-num
-            #
-            # 00 30 00 01 00 00 00 13  || 00 00 00 73 00 00 00 30
-            # _/ |  |   | \_________/  || \_________/ \_________/
-            # type refs page seg-size  ||  width-px    height-px
-            #
-            # 00 00 00 48 00 00 00 48
-            # \_________/ \_________/
-            #     xres       yres
-            #
-            # For more information on the data format, see:
-            # * https://github.com/agl/jbig2enc/blob/ea05019/fcd14492.pdf
-            # For more information about the generic coding, see:
-            # * https://github.com/agl/jbig2enc/blob/ea05019/src/jbig2enc.cc#L898
-            imgformat = ImageFormat.JBIG2
-            if (
-                rawdata[:24]
-                != b"\x97\x4a\x42\x32\x0d\x0a\x1a\x0a\x01\x00\x00\x00\x01\x00\x00\x00\x00\x30\x00\x01\x00\x00\x00\x13"
-            ):
-                raise ImageOpenError(
-                    "Unsupported JBIG2 format; only single-page generic coding is supported (e.g. from `jbig2enc`)."
-                )
-            if (
-                rawdata[-22:]
-                != b"\x00\x00\x00\x021\x00\x01\x00\x00\x00\x00\x00\x00\x00\x033\x00\x01\x00\x00\x00\x00"
-            ):
-                raise ImageOpenError(
-                    "Unsupported JBIG2 format; we expect end-of-page and end-of-file segments at the end (e.g. from `jbig2enc`)."
-                )
-        elif rawdata[:14].lower() == b"id=imagemagick":
+        if rawdata[:14].lower() == b"id=imagemagick":
            # image is in MIFF format
            # this is useful for 16 bit CMYK because PNG cannot do CMYK and thus
            # we need PIL but PIL cannot do 16 bit
@ -1950,7 +1851,12 @@ def read_images(
            )
    else:
        logger.debug("PIL format = %s", imgdata.format)
-        imgformat = getattr(ImageFormat, imgdata.format, ImageFormat.other)
+        imgformat = None
+        for f in ImageFormat:
+            if f.name == imgdata.format:
+                imgformat = f
+        if imgformat is None:
+            imgformat = ImageFormat.other

    def cleanup():
        if imgdata is not None:
@ -2179,28 +2085,6 @@ def read_images(
                    )
                ]

-    if imgformat == ImageFormat.JBIG2:
-        color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
-            imgdata, imgformat, default_dpi, colorspace, rawdata, rot
-        )
-        streamdata = rawdata[13:-22]  # Strip file header and footer
-        return [
-            (
-                color,
-                ndpi,
-                imgformat,
-                streamdata,
-                None,
-                imgwidthpx,
-                imgheightpx,
-                [],
-                False,
-                1,
-                rotation,
-                iccp,
-            )
-        ]
-
    if imgformat == ImageFormat.MIFF:
        return parse_miff(rawdata)

@ -2740,11 +2624,14 @@ def find_scale(pagewidth, pageheight):
    return 10 ** ceil(log10(oversized))


-# Convert the image(s) to a `pdfdoc` object.
-# The `.writer` attribute holds the underlying engine document handle, and
-# `.output_version` the minimum version the caller should use when saving.
-# The main convert() wraps this implementation function.
-def convert_to_docobject(*images, **kwargs):
+# given one or more input image, depending on outputstream, either return a
+# string containing the whole PDF if outputstream is None or write the PDF
+# data to the given file-like object and return None
+#
+# Input images can be given as file like objects (they must implement read()),
+# as a binary string representing the image content or as filenames to the
+# images.
+def convert(*images, **kwargs):
    _default_kwargs = dict(
        engine=None,
        title=None,
@ -2765,6 +2652,7 @@ def convert_to_docobject(*images, **kwargs):
        viewer_fit_window=False,
        viewer_center_window=False,
        viewer_fullscreen=False,
+        outputstream=None,
        first_frame_only=False,
        allow_oversized=True,
        cropborder=None,
@ -2927,22 +2815,10 @@ def convert_to_docobject(*images, **kwargs):
                iccp,
            )

-    pdf.finalize()
-    return pdf
-
-
-# given one or more input image, depending on outputstream, either return a
-# string containing the whole PDF if outputstream is None or write the PDF
-# data to the given file-like object and return None
-#
-# Input images can be given as file like objects (they must implement read()),
-# as a binary string representing the image content or as filenames to the
-# images.
-def convert(*images, outputstream=None, **kwargs):
-    pdf = convert_to_docobject(*images, **kwargs)
-    if outputstream:
-        pdf.tostream(outputstream)
+    if kwargs["outputstream"]:
+        pdf.tostream(kwargs["outputstream"])
        return
+
    return pdf.tostring()


@ -4102,10 +3978,6 @@ Examples:

    $ img2pdf --output out.pdf page1.jpg page2.jpg

-  Use a custom dpi value for the input images:
-
-    $ img2pdf --output out.pdf --imgsize 300dpi page1.jpg page2.jpg
-
  Convert a directory of JPEG images into a PDF with printable A4 pages in
  landscape mode. On each page, the photo takes the maximum amount of space
  while preserving its aspect ratio and a print border of 2 cm on the top and
--- a/src/img2pdf_test.py
+++ b/src/img2pdf_test.py
@ -2441,10 +2441,9 @@ def tiff_float_img(tmp_path_factory, tmp_normal_png):
        identify[0]["image"].get("properties", {}).get("quantum:format")
        == "floating-point"
    ), str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
    ), str(identify)
@ -2491,10 +2490,9 @@ def tiff_cmyk8_img(tmp_path_factory, tmp_normal_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric")
        == "separated"
@ -2544,10 +2542,9 @@ def tiff_cmyk16_img(tmp_path_factory, tmp_normal_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric")
        == "separated"
@ -2587,10 +2584,9 @@ def tiff_rgb8_img(tmp_path_factory, tmp_normal_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
    ), str(identify)
@ -2637,10 +2633,9 @@ def tiff_rgb12_img(tmp_path_factory, tmp_normal16_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
    ), str(identify)
@ -2687,10 +2682,9 @@ def tiff_rgb14_img(tmp_path_factory, tmp_normal16_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
    ), str(identify)
@ -2737,10 +2731,9 @@ def tiff_rgb16_img(tmp_path_factory, tmp_normal16_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
    ), str(identify)
@ -2887,10 +2880,9 @@ def tiff_gray1_img(tmp_path_factory, tmp_gray1_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric")
        == "min-is-black"
@ -2938,10 +2930,9 @@ def tiff_gray2_img(tmp_path_factory, tmp_gray2_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric")
        == "min-is-black"
@ -2989,10 +2980,9 @@ def tiff_gray4_img(tmp_path_factory, tmp_gray4_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric")
        == "min-is-black"
@ -3040,10 +3030,9 @@ def tiff_gray8_img(tmp_path_factory, tmp_gray8_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric")
        == "min-is-black"
@ -3091,10 +3080,9 @@ def tiff_gray16_img(tmp_path_factory, tmp_gray16_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric")
        == "min-is-black"
@ -3144,10 +3132,9 @@ def tiff_multipage_img(tmp_path_factory, tmp_normal_png, tmp_inverse_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
    ), str(identify)
@ -3178,10 +3165,9 @@ def tiff_multipage_img(tmp_path_factory, tmp_normal_png, tmp_inverse_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
    ), str(identify)
@ -3223,10 +3209,9 @@ def tiff_palette1_img(tmp_path_factory, tmp_palette1_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric") == "palette"
    ), str(identify)
@ -3267,10 +3252,9 @@ def tiff_palette2_img(tmp_path_factory, tmp_palette2_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric") == "palette"
    ), str(identify)
@ -3311,10 +3295,9 @@ def tiff_palette4_img(tmp_path_factory, tmp_palette4_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric") == "palette"
    ), str(identify)
@ -3354,10 +3337,9 @@ def tiff_palette8_img(tmp_path_factory, tmp_palette8_png):
        "x": 0,
        "y": 0,
    }, str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert (
        identify[0]["image"].get("properties", {}).get("tiff:photometric") == "palette"
    ), str(identify)
@ -3416,10 +3398,9 @@ def tiff_ccitt_lsb_m2l_white_img(tmp_path_factory, tmp_gray1_png):
        "y": 0,
    }, str(identify)
    assert identify[0]["image"].get("compression") == "Group4", str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
        identify
    )
@ -3499,10 +3480,9 @@ def tiff_ccitt_msb_m2l_white_img(tmp_path_factory, tmp_gray1_png):
        "y": 0,
    }, str(identify)
    assert identify[0]["image"].get("compression") == "Group4", str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "msb", str(
        identify
    )
@ -3582,10 +3562,9 @@ def tiff_ccitt_msb_l2m_white_img(tmp_path_factory, tmp_gray1_png):
        "y": 0,
    }, str(identify)
    assert identify[0]["image"].get("compression") == "Group4", str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "msb", str(
        identify
    )
@ -3670,10 +3649,9 @@ def tiff_ccitt_lsb_m2l_black_img(tmp_path_factory, tmp_gray1_png):
        "y": 0,
    }, str(identify)
    assert identify[0]["image"].get("compression") == "Group4", str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
        identify
    )
@ -3762,10 +3740,9 @@ def tiff_ccitt_nometa1_img(tmp_path_factory, tmp_gray1_png):
        "y": 0,
    }, str(identify)
    assert identify[0]["image"].get("compression") == "Group4", str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
        identify
    )
@ -3843,10 +3820,9 @@ def tiff_ccitt_nometa2_img(tmp_path_factory, tmp_gray1_png):
    assert identify[0]["image"].get("colorspace") == "Gray", str(identify)
    assert identify[0]["image"].get("depth") == 1, str(identify)
    assert identify[0]["image"].get("compression") == "Group4", str(identify)
-    assert identify[0]["image"].get("properties", {}).get("tiff:alpha") in [
-        "unspecified",
-        None,
-    ], str(identify)
+    assert (
+        identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
+    ), str(identify)
    assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
        identify
    )
@ -5619,7 +5595,7 @@ def test_jpg_2000(tmp_path_factory, jpg_2000_img, jpg_2000_pdf):
 def test_jpg_2000_rgba8(tmp_path_factory, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf):
    tmpdir = tmp_path_factory.mktemp("jpg_2000_rgba8")
    compare_ghostscript(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
-    # compare_poppler(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
+    compare_poppler(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
    # compare_mupdf(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
    compare_pdfimages_jp2(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)

@ -6427,9 +6403,11 @@ def test_tiff_ccitt_nometa2(
 )
 def test_miff_cmyk8(tmp_path_factory, miff_cmyk8_img, tiff_cmyk8_img, miff_cmyk8_pdf):
    tmpdir = tmp_path_factory.mktemp("miff_cmyk8")
-    compare_ghostscript(tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf, gsdevice="tiff32nc")
+    compare_ghostscript(
+        tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf, gsdevice="tiff32nc", exact=False
+    )
    # not testing with poppler as it cannot write CMYK images
-    compare_mupdf(tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf, cmyk=True)
+    compare_mupdf(tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf, exact=False, cmyk=True)
    compare_pdfimages_tiff(tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf)


@ -7009,12 +6987,7 @@ def test_general(general_input, engine):
    assert x.Root.Type == "/Catalog"
    assert sorted(x.Root.Pages.keys()) == ["/Count", "/Kids", "/Type"]
    assert x.Root.Pages.Type == "/Pages"
-    if f.endswith(".jb2"):
-        # PIL doens't support .jb2, so we load the original .png, which
-        # was converted to the .jb2 using `jbig2enc`.
-        orig_img = Image.open(f.replace(".jb2", ".png"))
-    else:
-        orig_img = Image.open(f)
+    orig_img = Image.open(f)
    for pagenum in range(len(x.Root.Pages.Kids)):
        # retrieve the original image frame that this page was
        # generated from
@ -7022,8 +6995,6 @@ def test_general(general_input, engine):
        cur_page = x.Root.Pages.Kids[pagenum]

        ndpi = orig_img.info.get("dpi", (96.0, 96.0))
-        if ndpi[0] <= 0.001 or ndpi[1] <= 0.001:
-            ndpi = (96.0, 96.0)
        # In python3, the returned dpi value for some tiff images will
        # not be an integer but a float. To make the behaviour of
        # img2pdf the same between python2 and python3, we convert that
@ -7073,7 +7044,6 @@ def test_general(general_input, engine):
            "/JPXDecode",
            "/FlateDecode",
            pikepdf.Array([pikepdf.Name.CCITTFaxDecode]),
-            "/JBIG2Decode",
        ]

        # test if the image has correct size
@ -7083,10 +7053,6 @@ def test_general(general_input, engine):
        # verbatim into the PDF
        if imgprops.Filter in ["/DCTDecode", "/JPXDecode"]:
            assert cur_page.Resources.XObject.Im0.read_raw_bytes() == orig_imgdata
-        elif imgprops.Filter == "/JBIG2Decode":
-            assert (
-                cur_page.Resources.XObject.Im0.read_raw_bytes() == orig_imgdata[13:-22]
-            )  # Strip file header and footer.
        elif imgprops.Filter == pikepdf.Array([pikepdf.Name.CCITTFaxDecode]):
            tiff_header = tiff_header_for_ccitt(
                int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4
@ -7180,16 +7146,6 @@ def test_general(general_input, engine):
        pass


-def test_return_engine_doc(tmp_path_factory):
-    inputf = os.path.join(os.path.dirname(__file__), "tests", "input", "normal.jpg")
-    outputf = tmp_path_factory.mktemp("return_engine_doc") / "normal.jpg.pdf"
-    pdf_wrapper = img2pdf.convert_to_docobject(inputf, engine=img2pdf.Engine.pikepdf)
-    pdf = pdf_wrapper.writer
-    assert isinstance(pdf, pikepdf.Pdf)
-    pdf.save(outputf, min_version=pdf_wrapper.output_version, linearize=True)
-    assert os.path.isfile(outputf)
-
-
 def main():
    normal16 = alpha_value()[:, :, 0:3]
    pathlib.Path("test.icc").write_bytes(icc_profile())
--- a/src/tests/input/mono.jb2
+++ b/src/tests/input/mono.jb2
--- a/src/tests/output/mono.jb2.pdf
+++ b/src/tests/output/mono.jb2.pdf