From 5a7dbc10c74167c8f0105f5803b81317cf676f42 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 31 Dec 2020 12:49:16 -0800 Subject: [PATCH] Optimize mmtarfilter to handle many path exclusions mmtarfilter uses fnmatch to handle path exclusions and inclusions. Python's fnmatch handles shell patterns by translating them to regular expressions, with a 256-entry LRU cache. With more than 256 path exclusions or inclusions, this LRU cache no longer works, and every invocation of fnmatch on every file in every package will re-translate and re-compile a regular expression, resulting in much worse performance. Translate all the shell patterns to regular expressions once. For an mmdebstrap invocation with around 500 path filters, this speeds up mmdebstrap by more than a minute. --- tarfilter | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tarfilter b/tarfilter index 838e4f5..ab76683 100755 --- a/tarfilter +++ b/tarfilter @@ -21,14 +21,15 @@ import tarfile import sys import argparse -from fnmatch import fnmatch +import fnmatch import re class FilterAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): items = getattr(namespace, "filter", []) - items.append((self.dest, values)) + regex = re.compile(fnmatch.translate(values)) + items.append((self.dest, regex)) setattr(namespace, "filter", items) @@ -64,17 +65,17 @@ dpkg(1) for information on how these two options work in detail. skip = False if not args.filter: return False - for (t, f) in args.filter: - if fnmatch(member.name[1:], f): + for (t, r) in args.filter: + if r.match(member.name[1:]) is not None: if t == "path_include": skip = False else: skip = True if skip and (member.isdir() or member.issym()): - for (t, f) in args.filter: + for (t, r) in args.filter: if t != "path_include": continue - prefix = re.sub(r"^([^*?[\\]*).*", r"\1", f) + prefix = re.sub(r"^([^*?[\\]*).*", r"\1", r.pattern) prefix = prefix.rstrip("/") if member.name[1:].startswith(prefix): if member.name == "./usr/share/doc/doc-debian":