tarfilter: add --type-exclude option

This commit is contained in:
Johannes Schauer Marin Rodrigues 2023-10-23 10:26:47 +02:00
parent 21366f76b7
commit 199e577757
Signed by: josch
GPG key ID: F2CBA5C78FBD83E1

View file

@ -43,6 +43,29 @@ class PaxFilterAction(argparse.Action):
setattr(namespace, "paxfilter", items) setattr(namespace, "paxfilter", items)
class TypeFilterAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
items = getattr(namespace, "typefilter", [])
match values:
case "REGTYPE" | "0":
items.append(tarfile.REGTYPE)
case "LNKTYPE" | "1":
items.append(tarfile.LNKTYPE)
case "SYMTYPE" | "2":
items.append(tarfile.SYMTYPE)
case "CHRTYPE" | "3":
items.append(tarfile.CHRTYPE)
case "BLKTYPE" | "4":
items.append(tarfile.BLKTYPE)
case "DIRTYPE" | "5":
items.append(tarfile.DIRTYPE)
case "FIFOTYPE" | "6":
items.append(tarfile.FIFOTYPE)
case _:
raise ValueError("invalid type: %s" % values)
setattr(namespace, "typefilter", items)
class TransformAction(argparse.Action): class TransformAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None): def __call__(self, parser, namespace, values, option_string=None):
items = getattr(namespace, "trans", []) items = getattr(namespace, "trans", [])
@ -89,10 +112,11 @@ dpkg(1) for information on how these two options work in detail. To reuse the
exact same semantics as used by dpkg, paths must be given as /path and not as exact same semantics as used by dpkg, paths must be given as /path and not as
./path even though they might be stored as such in the tarball. ./path even though they might be stored as such in the tarball.
Secondly, filter out unwanted pax extended headers. This is useful in cases Secondly, filter out unwanted pax extended headers using --pax-exclude and
where a tool only accepts certain xattr prefixes. For example tar2sqfs only --pax-include. This is useful in cases where a tool only accepts certain xattr
supports SCHILY.xattr.user.*, SCHILY.xattr.trusted.* and prefixes. For example tar2sqfs only supports SCHILY.xattr.user.*,
SCHILY.xattr.security.* but not SCHILY.xattr.system.posix_acl_default.*. SCHILY.xattr.trusted.* and SCHILY.xattr.security.* but not
SCHILY.xattr.system.posix_acl_default.*.
Both types of options use Unix shell-style wildcards: Both types of options use Unix shell-style wildcards:
@ -101,10 +125,16 @@ Both types of options use Unix shell-style wildcards:
[seq] matches any character in seq [seq] matches any character in seq
[!seq] matches any character not in seq [!seq] matches any character not in seq
Thirdly, transform the path of tar members using a sed expression just as with Thirdly, filter out files matching a specific tar archive member type using
--type-exclude. Valid type names are REGTYPE (regular file), LNKTYPE
(hardlink), SYMTYPE (symlink), CHRTYPE (character special), BLKTYPE (block
special), DIRTYPE (directory), FIFOTYPE (fifo) or their tar format flag value
(0-6, respectively).
Fourthly, transform the path of tar members using a sed expression just as with
GNU tar --transform. GNU tar --transform.
Fourthly, strip leading directory components off of tar members. Just as with Fifthly, strip leading directory components off of tar members. Just as with
GNU tar --strip-components, tar members that have less or equal components in GNU tar --strip-components, tar members that have less or equal components in
their path are not passed through. their path are not passed through.
@ -140,6 +170,15 @@ Lastly, shift user id and group id of each entry by the value given by the
help="Re-include a pax header after a previous exclusion. " help="Re-include a pax header after a previous exclusion. "
"This option can be specified multiple times.", "This option can be specified multiple times.",
) )
parser.add_argument(
"--type-exclude",
metavar="type",
action=TypeFilterAction,
help="Exclude certain member types by their type. Choose types either "
"by their name (REGTYPE, LNKTYPE, SYMTYPE, CHRTYPE, BLKTYPE, DIRTYPE, "
"FIFOTYPE) or by their tar format flag values (0-6, respectively). "
"This option can be specified multiple times.",
)
parser.add_argument( parser.add_argument(
"--transform", "--transform",
"--xform", "--xform",
@ -164,6 +203,7 @@ Lastly, shift user id and group id of each entry by the value given by the
if ( if (
not hasattr(args, "pathfilter") not hasattr(args, "pathfilter")
and not hasattr(args, "paxfilter") and not hasattr(args, "paxfilter")
and not hasattr(args, "typefilter")
and not hasattr(args, "strip_components") and not hasattr(args, "strip_components")
): ):
from shutil import copyfileobj from shutil import copyfileobj
@ -207,6 +247,14 @@ Lastly, shift user id and group id of each entry by the value given by the
skip = True skip = True
return skip return skip
def type_filter_should_skip(member):
if not hasattr(args, "typefilter"):
return False
for t in args.typefilter:
if member.type == t:
return True
return False
# starting with Python 3.8, the default format became PAX_FORMAT but we # starting with Python 3.8, the default format became PAX_FORMAT but we
# are still explicit here in case of future changes. # are still explicit here in case of future changes.
with tarfile.open(fileobj=sys.stdin.buffer, mode="r|*") as in_tar, tarfile.open( with tarfile.open(fileobj=sys.stdin.buffer, mode="r|*") as in_tar, tarfile.open(
@ -215,6 +263,8 @@ Lastly, shift user id and group id of each entry by the value given by the
for member in in_tar: for member in in_tar:
if path_filter_should_skip(member): if path_filter_should_skip(member):
continue continue
if type_filter_should_skip(member):
continue
if args.strip_components: if args.strip_components:
comps = member.name.split("/") comps = member.name.split("/")
# just as with GNU tar, archive members with less or equal # just as with GNU tar, archive members with less or equal