tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user.*, trusted.* and security.*

This commit is contained in:
Johannes Schauer Marin Rodrigues 2021-05-07 09:39:40 +02:00
parent bd5d3c3dab
commit 236b84a486
Signed by: josch
GPG key ID: F2CBA5C78FBD83E1
2 changed files with 78 additions and 15 deletions

View file

@ -5463,7 +5463,8 @@ sub main() {
); );
# tar2sqfs and genext2fs do not support extended attributes # tar2sqfs and genext2fs do not support extended attributes
if ($format eq "squashfs") { if ($format eq "squashfs") {
warning "tar2sqfs does not support extended attributes"; warning
"disabling extended attributes because tar2sqfs only supports some";
} elsif ($format eq "ext2") { } elsif ($format eq "ext2") {
warning "genext2fs does not support extended attributes"; warning "genext2fs does not support extended attributes";
} else { } else {
@ -6487,8 +6488,11 @@ C<tar2sqfs> utility, which will create an xz compressed squashfs image with a
blocksize of 1048576 bytes in I<TARGET>. The special I<TARGET> C<-> does not blocksize of 1048576 bytes in I<TARGET>. The special I<TARGET> C<-> does not
work with this format because C<tar2sqfs> can only write to a regular file. If work with this format because C<tar2sqfs> can only write to a regular file. If
you need your squashfs image be named C<->, then just explicitly pass the you need your squashfs image be named C<->, then just explicitly pass the
relative path to it like F<./->. Since C<tar2sqfs> does not support extended relative path to it like F<./->. The C<tar2sqfs> tool only supports a limited
attributes, the resulting image will not contain them. set of extended attribute prefixes. Therefore, extended attributes are disabled
in the resulting image. If you need them, create a tarball first and remove the
extended attributes from its pax headers. Refer to the B<EXAMPLES> section for
how to achieve this.
=item B<ext2> =item B<ext2>
@ -6767,7 +6771,16 @@ Instead of a tarball, a squashfs image can be created:
By default, B<mmdebstrap> runs B<tar2sqfs> with C<--no-skip --exportable By default, B<mmdebstrap> runs B<tar2sqfs> with C<--no-skip --exportable
--compressor xz --block-size 1048576>. To choose a different set of options, --compressor xz --block-size 1048576>. To choose a different set of options,
pipe the output of B<mmdebstrap> into B<tar2sqfs> manually. and to filter out all extended attributes not supported by B<tar2sqfs>, pipe
the output of B<mmdebstrap> into B<tar2sqfs> manually like so:
$ mmdebstrap unstable \
| mmtarfilter --pax-exclude='*' \
--pax-include='SCHILY.xattr.user.*' \
--pax-include='SCHILY.xattr.trusted.*' \
--pax-include='SCHILY.xattr.security.*' \
| tar2sqfs --quiet --no-skip --force --exportable --compressor xz \
--block-size 1048576 unstable-chroot.squashfs
By default, debootstrapping a stable distribution will add mirrors for security By default, debootstrapping a stable distribution will add mirrors for security
and updates to the sources.list. and updates to the sources.list.

View file

@ -25,12 +25,20 @@ import fnmatch
import re import re
class FilterAction(argparse.Action): class PathFilterAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None): def __call__(self, parser, namespace, values, option_string=None):
items = getattr(namespace, "filter", []) items = getattr(namespace, "pathfilter", [])
regex = re.compile(fnmatch.translate(values)) regex = re.compile(fnmatch.translate(values))
items.append((self.dest, regex)) items.append((self.dest, regex))
setattr(namespace, "filter", items) setattr(namespace, "pathfilter", items)
class PaxFilterAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
items = getattr(namespace, "paxfilter", [])
regex = re.compile(fnmatch.translate(values))
items.append((self.dest, regex))
setattr(namespace, "paxfilter", items)
def main(): def main():
@ -39,22 +47,46 @@ def main():
Filters a tarball on standard input by the same rules as the dpkg --path-exclude Filters a tarball on standard input by the same rules as the dpkg --path-exclude
and --path-include options and writes resulting tarball to standard output. See and --path-include options and writes resulting tarball to standard output. See
dpkg(1) for information on how these two options work in detail. dpkg(1) for information on how these two options work in detail.
Similarly, filter out unwanted pax extended headers. This is useful in cases
where a tool only accepts certain xattr prefixes. For example tar2sqfs only
supports SCHILY.xattr.user.*, SCHILY.xattr.trusted.* and
SCHILY.xattr.security.* but not SCHILY.xattr.system.posix_acl_default.*.
Both types of options use Unix shell-style wildcards:
* matches everything
? matches any single character
[seq] matches any character in seq
[!seq] matches any character not in seq
""" """
) )
parser.add_argument( parser.add_argument(
"--path-exclude", "--path-exclude",
metavar="pattern", metavar="pattern",
action=FilterAction, action=PathFilterAction,
help="Exclude path matching the given shell pattern.", help="Exclude path matching the given shell pattern.",
) )
parser.add_argument( parser.add_argument(
"--path-include", "--path-include",
metavar="pattern", metavar="pattern",
action=FilterAction, action=PathFilterAction,
help="Re-include a pattern after a previous exclusion.", help="Re-include a pattern after a previous exclusion.",
) )
parser.add_argument(
"--pax-exclude",
metavar="pattern",
action=PaxFilterAction,
help="Exclude pax header matching the given globbing pattern.",
)
parser.add_argument(
"--pax-include",
metavar="pattern",
action=PaxFilterAction,
help="Re-include a pax header after a previous exclusion.",
)
args = parser.parse_args() args = parser.parse_args()
if not hasattr(args, "filter"): if not hasattr(args, "pathfilter") and not hasattr(args, "paxfilter"):
from shutil import copyfileobj from shutil import copyfileobj
copyfileobj(sys.stdin.buffer, sys.stdout.buffer) copyfileobj(sys.stdin.buffer, sys.stdout.buffer)
@ -63,18 +95,18 @@ dpkg(1) for information on how these two options work in detail.
# same logic as in dpkg/src/filters.c/filter_should_skip() # same logic as in dpkg/src/filters.c/filter_should_skip()
prefix_prog = re.compile(r"^([^*?[\\]*).*") prefix_prog = re.compile(r"^([^*?[\\]*).*")
def filter_should_skip(member): def path_filter_should_skip(member):
skip = False skip = False
if not args.filter: if not hasattr(args, "pathfilter"):
return False return False
for (t, r) in args.filter: for (t, r) in args.pathfilter:
if r.match(member.name[1:]) is not None: if r.match(member.name[1:]) is not None:
if t == "path_include": if t == "path_include":
skip = False skip = False
else: else:
skip = True skip = True
if skip and (member.isdir() or member.issym()): if skip and (member.isdir() or member.issym()):
for (t, r) in args.filter: for (t, r) in args.pathfilter:
if t != "path_include": if t != "path_include":
continue continue
prefix = prefix_prog.sub(r"\1", r.pattern) prefix = prefix_prog.sub(r"\1", r.pattern)
@ -83,14 +115,32 @@ dpkg(1) for information on how these two options work in detail.
return False return False
return skip return skip
def pax_filter_should_skip(header):
if not hasattr(args, "paxfilter"):
return False
skip = False
for (t, r) in args.paxfilter:
if r.match(header) is None:
continue
if t == "pax_include":
skip = False
else:
skip = True
return skip
# starting with Python 3.8, the default format became PAX_FORMAT, so this # starting with Python 3.8, the default format became PAX_FORMAT, so this
# is only for compatibility with older versions of Python 3 # is only for compatibility with older versions of Python 3
with tarfile.open(fileobj=sys.stdin.buffer, mode="r|*") as in_tar, tarfile.open( with tarfile.open(fileobj=sys.stdin.buffer, mode="r|*") as in_tar, tarfile.open(
fileobj=sys.stdout.buffer, mode="w|", format=tarfile.PAX_FORMAT fileobj=sys.stdout.buffer, mode="w|", format=tarfile.PAX_FORMAT
) as out_tar: ) as out_tar:
for member in in_tar: for member in in_tar:
if filter_should_skip(member): if path_filter_should_skip(member):
continue continue
member.pax_headers = {
k: v
for k, v in member.pax_headers.items()
if not pax_filter_should_skip(k)
}
if member.isfile(): if member.isfile():
with in_tar.extractfile(member) as file: with in_tar.extractfile(member) as file:
out_tar.addfile(member, file) out_tar.addfile(member, file)