Add Windows support for paths containing wildcards "*" and "?"

Img2pdf accepts paths containing wildcards such as *.jpg to efficiently refer to multiple input files that match the wildcard expression. Under POSIX environments the command line shell is expected to perform this expansion, however Windows requires the command line utility itself to expand the wildcard expression. Ideally this would be performed by argparse as described in this draft PEP: https://mail.python.org/pipermail/python-ideas/2015-August/035244.html Since argparse doesn't do it, this commit performs expansion directly. Some implementation notes: - Wildcard characters "*" and "?" are not valid in Windows filenames - Code doesn't support bracket wildcards such as [0-3] on Windows since they are valid filename characters - Due to expansion, the images list collected by argparse may contain sub-lists. Code uses chain.from_iterable to create a flat list. - Paths that refer to non-existant files raise an error message, while wildcards that match no files are silently ignored.
2020-09-23 12:39:06 -04:00 · 2020-09-23 12:39:06 -04:00 · 505344f83e
commit 505344f83e
parent 32b4ed1f43
1 changed files with 28 additions and 18 deletions
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@ -34,6 +34,7 @@ import logging
 import struct
 import platform
 import hashlib
+from itertools import chain

 have_pdfrw = True
 try:
@ -2265,27 +2266,36 @@ def parse_borderarg(string):
    return h, v


-def input_images(path):
-    if path == "-":
+def input_images(path_expr):
+    if path_expr == "-":
        # we slurp in all data from stdin because we need to seek in it later
        result = sys.stdin.buffer.read()
        if len(result) == 0:
-            raise argparse.ArgumentTypeError('"%s" is empty' % path)
+            raise argparse.ArgumentTypeError('"%s" is empty' % path_expr)
    else:
-        try:
-            if os.path.getsize(path) == 0:
-                raise argparse.ArgumentTypeError('"%s" is empty' % path)
-            # test-read a byte from it so that we can abort early in case
-            # we cannot read data from the file
-            with open(path, "rb") as im:
-                im.read(1)
-        except IsADirectoryError:
-            raise argparse.ArgumentTypeError('"%s" is a directory' % path)
-        except PermissionError:
-            raise argparse.ArgumentTypeError('"%s" permission denied' % path)
-        except FileNotFoundError:
-            raise argparse.ArgumentTypeError('"%s" does not exist' % path)
-        result = path
+        result = []
+        paths = [path_expr]
+        if sys.platform == "win32" and ("*" in path_expr or "?" in path_expr):
+            # on windows, program is responsible for expanding wildcards such as *.jpg
+            # glob won't return files that don't exist so we only use it for wildcards
+            # paths without wildcards that do not exist will trigger "does not exist"
+            from glob import glob
+            paths = glob(path_expr)
+        for path in paths:
+            try:
+                if os.path.getsize(path) == 0:
+                    raise argparse.ArgumentTypeError('"%s" is empty' % path)
+                # test-read a byte from it so that we can abort early in case
+                # we cannot read data from the file
+                with open(path, "rb") as im:
+                    im.read(1)
+            except IsADirectoryError:
+                raise argparse.ArgumentTypeError('"%s" is a directory' % path)
+            except PermissionError:
+                raise argparse.ArgumentTypeError('"%s" permission denied' % path)
+            except FileNotFoundError:
+                raise argparse.ArgumentTypeError('"%s" does not exist' % path)
+            result.append(path)
    return result


@ -3630,7 +3640,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders.

    try:
        convert(
-            *args.images,
+            *chain.from_iterable(args.images),
            engine=args.engine,
            title=args.title,
            author=args.author,