Add Windows support for paths containing wildcards "*" and "?"

Img2pdf accepts paths containing wildcards such as *.jpg to efficiently refer
to multiple input files that match the wildcard expression. Under POSIX
environments the command line shell is expected to perform this expansion,
however Windows requires the command line utility itself to expand the
wildcard expression.

Ideally this would be performed by argparse as described in this draft PEP:
https://mail.python.org/pipermail/python-ideas/2015-August/035244.html

Since argparse doesn't do it, this commit performs expansion directly.

Some implementation notes:
 - Wildcard characters "*" and "?" are not valid in Windows filenames
 - Code doesn't support bracket wildcards such as [0-3] on Windows since
   they are valid filename characters
 - Due to expansion, the images list collected by argparse may contain
   sub-lists. Code uses chain.from_iterable to create a flat list.
 - Paths that refer to non-existant files raise an error message, while
   wildcards that match no files are silently ignored.
This commit is contained in:
Paul Ingemi 2020-09-23 12:39:06 -04:00 committed by Johannes 'josch' Schauer
parent 32b4ed1f43
commit 505344f83e
Signed by: josch
GPG key ID: F2CBA5C78FBD83E1

View file

@ -34,6 +34,7 @@ import logging
import struct import struct
import platform import platform
import hashlib import hashlib
from itertools import chain
have_pdfrw = True have_pdfrw = True
try: try:
@ -2265,27 +2266,36 @@ def parse_borderarg(string):
return h, v return h, v
def input_images(path): def input_images(path_expr):
if path == "-": if path_expr == "-":
# we slurp in all data from stdin because we need to seek in it later # we slurp in all data from stdin because we need to seek in it later
result = sys.stdin.buffer.read() result = sys.stdin.buffer.read()
if len(result) == 0: if len(result) == 0:
raise argparse.ArgumentTypeError('"%s" is empty' % path) raise argparse.ArgumentTypeError('"%s" is empty' % path_expr)
else: else:
try: result = []
if os.path.getsize(path) == 0: paths = [path_expr]
raise argparse.ArgumentTypeError('"%s" is empty' % path) if sys.platform == "win32" and ("*" in path_expr or "?" in path_expr):
# test-read a byte from it so that we can abort early in case # on windows, program is responsible for expanding wildcards such as *.jpg
# we cannot read data from the file # glob won't return files that don't exist so we only use it for wildcards
with open(path, "rb") as im: # paths without wildcards that do not exist will trigger "does not exist"
im.read(1) from glob import glob
except IsADirectoryError: paths = glob(path_expr)
raise argparse.ArgumentTypeError('"%s" is a directory' % path) for path in paths:
except PermissionError: try:
raise argparse.ArgumentTypeError('"%s" permission denied' % path) if os.path.getsize(path) == 0:
except FileNotFoundError: raise argparse.ArgumentTypeError('"%s" is empty' % path)
raise argparse.ArgumentTypeError('"%s" does not exist' % path) # test-read a byte from it so that we can abort early in case
result = path # we cannot read data from the file
with open(path, "rb") as im:
im.read(1)
except IsADirectoryError:
raise argparse.ArgumentTypeError('"%s" is a directory' % path)
except PermissionError:
raise argparse.ArgumentTypeError('"%s" permission denied' % path)
except FileNotFoundError:
raise argparse.ArgumentTypeError('"%s" does not exist' % path)
result.append(path)
return result return result
@ -3630,7 +3640,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
try: try:
convert( convert(
*args.images, *chain.from_iterable(args.images),
engine=args.engine, engine=args.engine,
title=args.title, title=args.title,
author=args.author, author=args.author,