Add Windows support for paths containing wildcards "*" and "?"

Img2pdf accepts paths containing wildcards such as *.jpg to efficiently refer
to multiple input files that match the wildcard expression. Under POSIX
environments the command line shell is expected to perform this expansion,
however Windows requires the command line utility itself to expand the
wildcard expression.

Ideally this would be performed by argparse as described in this draft PEP:
https://mail.python.org/pipermail/python-ideas/2015-August/035244.html

Since argparse doesn't do it, this commit performs expansion directly.

Some implementation notes:
 - Wildcard characters "*" and "?" are not valid in Windows filenames
 - Code doesn't support bracket wildcards such as [0-3] on Windows since
   they are valid filename characters
 - Due to expansion, the images list collected by argparse may contain
   sub-lists. Code uses chain.from_iterable to create a flat list.
 - Paths that refer to non-existant files raise an error message, while
   wildcards that match no files are silently ignored.
This commit is contained in:
Paul Ingemi 2020-09-23 12:39:06 -04:00 committed by Johannes 'josch' Schauer
parent 32b4ed1f43
commit 505344f83e
Signed by: josch
GPG key ID: F2CBA5C78FBD83E1

View file

@ -34,6 +34,7 @@ import logging
import struct import struct
import platform import platform
import hashlib import hashlib
from itertools import chain
have_pdfrw = True have_pdfrw = True
try: try:
@ -2265,13 +2266,22 @@ def parse_borderarg(string):
return h, v return h, v
def input_images(path): def input_images(path_expr):
if path == "-": if path_expr == "-":
# we slurp in all data from stdin because we need to seek in it later # we slurp in all data from stdin because we need to seek in it later
result = sys.stdin.buffer.read() result = sys.stdin.buffer.read()
if len(result) == 0: if len(result) == 0:
raise argparse.ArgumentTypeError('"%s" is empty' % path) raise argparse.ArgumentTypeError('"%s" is empty' % path_expr)
else: else:
result = []
paths = [path_expr]
if sys.platform == "win32" and ("*" in path_expr or "?" in path_expr):
# on windows, program is responsible for expanding wildcards such as *.jpg
# glob won't return files that don't exist so we only use it for wildcards
# paths without wildcards that do not exist will trigger "does not exist"
from glob import glob
paths = glob(path_expr)
for path in paths:
try: try:
if os.path.getsize(path) == 0: if os.path.getsize(path) == 0:
raise argparse.ArgumentTypeError('"%s" is empty' % path) raise argparse.ArgumentTypeError('"%s" is empty' % path)
@ -2285,7 +2295,7 @@ def input_images(path):
raise argparse.ArgumentTypeError('"%s" permission denied' % path) raise argparse.ArgumentTypeError('"%s" permission denied' % path)
except FileNotFoundError: except FileNotFoundError:
raise argparse.ArgumentTypeError('"%s" does not exist' % path) raise argparse.ArgumentTypeError('"%s" does not exist' % path)
result = path result.append(path)
return result return result
@ -3630,7 +3640,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
try: try:
convert( convert(
*args.images, *chain.from_iterable(args.images),
engine=args.engine, engine=args.engine,
title=args.title, title=args.title,
author=args.author, author=args.author,