diff --git a/mmdebstrap b/mmdebstrap index 0b826f1..ef48fdb 100755 --- a/mmdebstrap +++ b/mmdebstrap @@ -42,6 +42,7 @@ use Carp; use Term::ANSIColor; use Socket; use Time::HiRes; +use Math::BigInt; use version; ## no critic (InputOutput::RequireBriefOpen) @@ -175,6 +176,25 @@ sub error { } } +# The encoding of dev_t is MMMM Mmmm mmmM MMmm, where M is a hex digit of +# the major number and m is a hex digit of the minor number. +sub major { + my $rdev = shift; + my $right + = Math::BigInt->from_hex("0x00000000000fff00")->band($rdev)->brsft(8); + my $left + = Math::BigInt->from_hex("0xfffff00000000000")->band($rdev)->brsft(32); + return $right->bior($left); +} + +sub minor { + my $rdev = shift; + my $right = Math::BigInt->from_hex("0x00000000000000ff")->band($rdev); + my $left + = Math::BigInt->from_hex("0x00000ffffff00000")->band($rdev)->brsft(12); + return $right->bior($left); +} + # check whether a directory is mounted by comparing the device number of the # directory itself with its parent sub is_mountpoint { @@ -5149,7 +5169,22 @@ sub main() { # figure out the right format if ($format eq 'auto') { - if ($options->{target} eq '/dev/null') { + # (stat(...))[6] is the device identifier which contains the major and + # minor numbers for character special files + # major 1 and minor 3 is /dev/null on Linux + if ( $options->{target} eq '/dev/null' + and $OSNAME eq 'linux' + and -c '/dev/null' + and major((stat("/dev/null"))[6]) == 1 + and minor((stat("/dev/null"))[6]) == 3) { + $format = 'null'; + } elsif ($options->{target} eq '-' + and $OSNAME eq 'linux' + and major((stat(STDOUT))[6]) == 1 + and minor((stat(STDOUT))[6]) == 3) { + # by checking the major and minor number of the STDOUT fd we also + # can detect redirections to /dev/null and choose the null format + # accordingly $format = 'null'; } elsif ($options->{target} ne '-' and -d $options->{target}) { $format = 'directory'; @@ -5226,6 +5261,11 @@ sub main() { error "the $format format is unable to write to standard output"; } + if ($format eq 'null' + and none { $_ eq $options->{target} } ('-', '/dev/null')) { + info "ignoring target $options->{target} with null format"; + } + if (any { $_ eq $format } ('tar', 'squashfs', 'ext2', 'null')) { if ($format ne 'null') { if ( any { $_ eq $options->{variant} } ('extract', 'custom') @@ -5423,7 +5463,8 @@ sub main() { ); # tar2sqfs and genext2fs do not support extended attributes if ($format eq "squashfs") { - warning "tar2sqfs does not support extended attributes"; + warning + "disabling extended attributes because tar2sqfs only supports some"; } elsif ($format eq "ext2") { warning "genext2fs does not support extended attributes"; } else { @@ -5777,9 +5818,9 @@ sub main() { # change signal handler message $waiting_for = "cleanup"; - if (any { $_ eq $format } ('directory', 'null')) { + if (any { $_ eq $format } ('directory')) { # nothing to do - } elsif (any { $_ eq $format } ('tar', 'squashfs', 'ext2')) { + } elsif (any { $_ eq $format } ('tar', 'squashfs', 'ext2', 'null')) { if (!-e $options->{root}) { error "$options->{root} does not exist"; } @@ -6405,16 +6446,17 @@ Without that option the default format is I. The following formats exist: When selecting this format (the default), the actual format will be inferred from the I positional argument. If I was not specified, then -the B format will be chosen. If I happens to be F, then -the B format will be chosen. If I is an existing directory, and -does not equal to C<->, then the B format will be chosen. If -I ends with C<.tar> or with one of the filename extensions listed in -the section B, or if I equals C<->, or if I is a -named pipe (fifo) or if I is a character special file like -F, then the B format will be chosen. If I ends with -C<.squashfs> or C<.sqfs>, then the B format will be chosen. If - ends with C<.ext2> then the B format will be chosen. If none of -these conditions apply, the B format will be chosen. +the B format will be chosen. If I happens to be F or if +standard output is F, then the B format will be chosen. If +I is an existing directory, and does not equal to C<->, then the +B format will be chosen. If I ends with C<.tar> or with one +of the filename extensions listed in the section B, or if +I equals C<->, or if I is a named pipe (fifo) or if I +is a character special file, then the B format will be chosen. If +I ends with C<.squashfs> or C<.sqfs>, then the B format will +be chosen. If ends with C<.ext2> then the B format will be +chosen. If none of these conditions apply, the B format will be +chosen. =item B, B @@ -6446,8 +6488,11 @@ C utility, which will create an xz compressed squashfs image with a blocksize of 1048576 bytes in I. The special I C<-> does not work with this format because C can only write to a regular file. If you need your squashfs image be named C<->, then just explicitly pass the -relative path to it like F<./->. Since C does not support extended -attributes, the resulting image will not contain them. +relative path to it like F<./->. The C tool only supports a limited +set of extended attribute prefixes. Therefore, extended attributes are disabled +in the resulting image. If you need them, create a tarball first and remove the +extended attributes from its pax headers. Refer to the B section for +how to achieve this. =item B @@ -6468,7 +6513,8 @@ A temporary chroot directory will be created in C<$TMPDIR> or F if C<$TMPDIR> is not set. After the bootstrap is complete, the temporary chroot will be deleted without being part of the output. This is most useful when the desired artifact is generated inside the chroot and it is transferred using -special hooks such as B. +special hooks such as B. It is also useful in situations where only +the exit code or stdout or stderr of a process run in a hook is of interest. =back @@ -6725,7 +6771,16 @@ Instead of a tarball, a squashfs image can be created: By default, B runs B with C<--no-skip --exportable --compressor xz --block-size 1048576>. To choose a different set of options, -pipe the output of B into B manually. +and to filter out all extended attributes not supported by B, pipe +the output of B into B manually like so: + + $ mmdebstrap unstable \ + | mmtarfilter --pax-exclude='*' \ + --pax-include='SCHILY.xattr.user.*' \ + --pax-include='SCHILY.xattr.trusted.*' \ + --pax-include='SCHILY.xattr.security.*' \ + | tar2sqfs --quiet --no-skip --force --exportable --compressor xz \ + --block-size 1048576 unstable-chroot.squashfs By default, debootstrapping a stable distribution will add mirrors for security and updates to the sources.list. diff --git a/tarfilter b/tarfilter index 1b15dff..6c1c766 100755 --- a/tarfilter +++ b/tarfilter @@ -25,12 +25,20 @@ import fnmatch import re -class FilterAction(argparse.Action): +class PathFilterAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): - items = getattr(namespace, "filter", []) + items = getattr(namespace, "pathfilter", []) regex = re.compile(fnmatch.translate(values)) items.append((self.dest, regex)) - setattr(namespace, "filter", items) + setattr(namespace, "pathfilter", items) + + +class PaxFilterAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + items = getattr(namespace, "paxfilter", []) + regex = re.compile(fnmatch.translate(values)) + items.append((self.dest, regex)) + setattr(namespace, "paxfilter", items) def main(): @@ -39,22 +47,46 @@ def main(): Filters a tarball on standard input by the same rules as the dpkg --path-exclude and --path-include options and writes resulting tarball to standard output. See dpkg(1) for information on how these two options work in detail. + +Similarly, filter out unwanted pax extended headers. This is useful in cases +where a tool only accepts certain xattr prefixes. For example tar2sqfs only +supports SCHILY.xattr.user.*, SCHILY.xattr.trusted.* and +SCHILY.xattr.security.* but not SCHILY.xattr.system.posix_acl_default.*. + +Both types of options use Unix shell-style wildcards: + + * matches everything + ? matches any single character + [seq] matches any character in seq + [!seq] matches any character not in seq """ ) parser.add_argument( "--path-exclude", metavar="pattern", - action=FilterAction, + action=PathFilterAction, help="Exclude path matching the given shell pattern.", ) parser.add_argument( "--path-include", metavar="pattern", - action=FilterAction, + action=PathFilterAction, help="Re-include a pattern after a previous exclusion.", ) + parser.add_argument( + "--pax-exclude", + metavar="pattern", + action=PaxFilterAction, + help="Exclude pax header matching the given globbing pattern.", + ) + parser.add_argument( + "--pax-include", + metavar="pattern", + action=PaxFilterAction, + help="Re-include a pax header after a previous exclusion.", + ) args = parser.parse_args() - if not hasattr(args, "filter"): + if not hasattr(args, "pathfilter") and not hasattr(args, "paxfilter"): from shutil import copyfileobj copyfileobj(sys.stdin.buffer, sys.stdout.buffer) @@ -63,36 +95,52 @@ dpkg(1) for information on how these two options work in detail. # same logic as in dpkg/src/filters.c/filter_should_skip() prefix_prog = re.compile(r"^([^*?[\\]*).*") - def filter_should_skip(member): + def path_filter_should_skip(member): skip = False - if not args.filter: + if not hasattr(args, "pathfilter"): return False - for (t, r) in args.filter: + for (t, r) in args.pathfilter: if r.match(member.name[1:]) is not None: if t == "path_include": skip = False else: skip = True if skip and (member.isdir() or member.issym()): - for (t, r) in args.filter: + for (t, r) in args.pathfilter: if t != "path_include": continue prefix = prefix_prog.sub(r"\1", r.pattern) prefix = prefix.rstrip("/") if member.name[1:].startswith(prefix): - if member.name == "./usr/share/doc/doc-debian": - print("foo", prefix, "bar", file=sys.stderr) return False return skip + def pax_filter_should_skip(header): + if not hasattr(args, "paxfilter"): + return False + skip = False + for (t, r) in args.paxfilter: + if r.match(header) is None: + continue + if t == "pax_include": + skip = False + else: + skip = True + return skip + # starting with Python 3.8, the default format became PAX_FORMAT, so this # is only for compatibility with older versions of Python 3 with tarfile.open(fileobj=sys.stdin.buffer, mode="r|*") as in_tar, tarfile.open( fileobj=sys.stdout.buffer, mode="w|", format=tarfile.PAX_FORMAT ) as out_tar: for member in in_tar: - if filter_should_skip(member): + if path_filter_should_skip(member): continue + member.pax_headers = { + k: v + for k, v in member.pax_headers.items() + if not pax_filter_should_skip(k) + } if member.isfile(): with in_tar.extractfile(member) as file: out_tar.addfile(member, file)