Slightly simplify the getexif procedure #202

Open
mara0004 wants to merge 2 commits from mara0004/img2pdf:getexif into main

View file

@ -22,7 +22,7 @@ import sys
import os
import zlib
import argparse
from PIL import Image, TiffImagePlugin, GifImagePlugin, ImageCms
from PIL import Image, TiffImagePlugin, GifImagePlugin, ImageCms, ExifTags
if hasattr(GifImagePlugin, "LoadingStrategy"):
# Pillow 9.0.0 started emitting all frames but the first as RGB instead of
@ -35,7 +35,6 @@ if hasattr(GifImagePlugin, "LoadingStrategy"):
)
# TiffImagePlugin.DEBUG = True
from PIL.ExifTags import TAGS
from datetime import datetime, timezone
import jp2
from enum import Enum
@ -1372,41 +1371,42 @@ def get_imgmetadata(
rotation = 0
if rotreq in (None, Rotation.auto, Rotation.ifvalid):
if hasattr(imgdata, "_getexif") and imgdata._getexif() is not None:
Review

checks like these for _getexif are there for a reason. Could you try and find out what changed that does not make these necessary anymore and since when it is like that?

checks like these for `_getexif` are there for a reason. Could you try and find out what changed that does not make these necessary anymore and since when it is like that?
Review

getexif() (without underscore) is available since Pillow 6.0.0, that is more than 5 years ago.
Is requiring that acceptable to you?

Otherwise, I could add a fallback to _getexif(), provided that also returns a subscriptable dict which can be treated the same way as getexif().
But IMHO there's not much point in supporting far outdated versions of Pillow, as that makes the code harder to read.

`getexif()` (without underscore) is available since Pillow [6.0.0](https://pillow.readthedocs.io/en/stable/releasenotes/6.0.0.html#added-exif-class), that is more than 5 years ago. Is requiring that acceptable to you? Otherwise, I could add a fallback to `_getexif()`, provided that also returns a subscriptable dict which can be treated the same way as `getexif()`. But IMHO there's not much point in supporting far outdated versions of Pillow, as that makes the code harder to read.
Review

It makes the code harder to read, yes. But the advantage is, that this way, recent versions of img2pdf keep working on older platforms and distributions that ship Pillow before version 6 are still receiving support and security updates. I do not think it's smart to break img2pdf on a platform just to have 5 lines less of code. I mean imagine somebody from a library, archive or school (which at least where i'm from are known to update rarely) reports a bug that img2pdf broke for them. Will you explain them that this is because we wanted to have a few lines of code less?

It makes the code harder to read, yes. But the advantage is, that this way, recent versions of img2pdf keep working on older platforms and distributions that ship Pillow before version 6 are still receiving support and security updates. I do not think it's smart to break img2pdf on a platform just to have 5 lines less of code. I mean imagine somebody from a library, archive or school (which at least where i'm from are known to update rarely) reports a bug that img2pdf broke for them. Will you explain them that this is because we wanted to have a few lines of code less?
Review

Indeed, I see Debian Buster is still on Pillow 5.4 :/
I understand your BW compat concerns and respect your decision, but would not do it that way in my own projects.

IMHO, if you are on a legacy base system, you should be prepared to accept that newer software may not be easily installable. If the institutions you mention do not update the base system, why should they update img2pdf? Sure, new version provide fixes and add features. But that applies to many, many packages throughout the whole system, so if you care about these improvements, you should upgrade your distribution.
I think it's legitimate to break compatibility with legacy versions of dependencies at some point and ask people to upgrade, and many (most?) other projects are doing so.
Sure, one specific case may not be relevant, but if you removed all workarounds for older versions, I bet it'll be more than 5 lines ;)

Anyway, I will look into adding the fallback.

Indeed, I see Debian Buster is still on Pillow 5.4 :/ I understand your BW compat concerns and respect your decision, but would not do it that way in my own projects. IMHO, if you are on a legacy base system, you should be prepared to accept that newer software may not be easily installable. If the institutions you mention do not update the base system, why should they update img2pdf? Sure, new version provide fixes and add features. But that applies to many, many packages throughout the whole system, so if you care about these improvements, you should upgrade your distribution. I think it's legitimate to break compatibility with legacy versions of dependencies at some point and ask people to upgrade, and many (most?) other projects are doing so. Sure, one specific case may not be relevant, but if you removed all workarounds for older versions, I bet it'll be more than 5 lines ;) Anyway, I will look into adding the fallback.
for tag, value in imgdata._getexif().items():
if TAGS.get(tag, tag) == "Orientation":
# Detailed information on EXIF rotation tags:
# http://impulseadventure.com/photo/exif-orientation.html
if value == 1:
rotation = 0
elif value == 6:
rotation = 90
elif value == 3:
rotation = 180
elif value == 8:
rotation = 270
elif value in (2, 4, 5, 7):
if rotreq == Rotation.ifvalid:
logger.warning(
"Unsupported flipped rotation mode (%d): use "
"--rotation=ifvalid or "
"rotation=img2pdf.Rotation.ifvalid to ignore",
value,
)
else:
raise ExifOrientationError(
"Unsupported flipped rotation mode (%d): use "
"--rotation=ifvalid or "
"rotation=img2pdf.Rotation.ifvalid to ignore" % value
)
else:
if rotreq == Rotation.ifvalid:
logger.warning("Invalid rotation (%d)", value)
else:
raise ExifOrientationError(
"Invalid rotation (%d): use --rotation=ifvalid "
"or rotation=img2pdf.Rotation.ifvalid to ignore" % value
)
exif_dict = imgdata.getexif()
o_key = ExifTags.Base.Orientation.value # 274 rsp. 0x112
if exif_dict and o_key in exif_dict:
# Detailed information on EXIF rotation tags:
# http://impulseadventure.com/photo/exif-orientation.html
value = exif_dict[o_key]
if value == 1:
rotation = 0
elif value == 6:
rotation = 90
elif value == 3:
rotation = 180
elif value == 8:
rotation = 270
elif value in (2, 4, 5, 7):
if rotreq == Rotation.ifvalid:
Review

BTW, I think you could theoretically support flipped orientation modes using content stream matrices.
Negative width should give horizontal flip (X inversion), and negative height vertical flip (Y inversion).
Assuming /Rotate is clockwise and applied after the matrix, I believe the procedures would be:

2: H, 0
4: V, 0
5: V, 90
7: V, 270

See also Adam M. Costello's illustration in http://sylvana.net/jpegcrop/exif_orientation.html

BTW, I think you could theoretically support flipped orientation modes using [content stream matrices](https://gitlab.mister-muffin.de/josch/img2pdf/src/commit/819b366bf5751e475b32b8f4670fed819fb5e7af/src/img2pdf.py#L994). Negative width should give horizontal flip (X inversion), and negative height vertical flip (Y inversion). Assuming `/Rotate` is clockwise and applied after the matrix, I believe the procedures would be: ```python 2: H, 0 4: V, 0 5: V, 90 7: V, 270 ``` See also Adam M. Costello's illustration in http://sylvana.net/jpegcrop/exif_orientation.html
logger.warning(
"Unsupported flipped rotation mode (%d): use "
"--rotation=ifvalid or "
"rotation=img2pdf.Rotation.ifvalid to ignore",
value,
)
else:
raise ExifOrientationError(
"Unsupported flipped rotation mode (%d): use "
"--rotation=ifvalid or "
"rotation=img2pdf.Rotation.ifvalid to ignore" % value
)
else:
if rotreq == Rotation.ifvalid:
logger.warning("Invalid rotation (%d)", value)
else:
raise ExifOrientationError(
"Invalid rotation (%d): use --rotation=ifvalid "
"or rotation=img2pdf.Rotation.ifvalid to ignore" % value
)
elif rotreq in (Rotation.none, Rotation["0"]):
rotation = 0
elif rotreq == Rotation["90"]: