From 465c0564345b456c41016abc6a4b1cb727125961 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Tue, 25 Aug 2020 13:02:33 +0200 Subject: [PATCH] no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility --- coverage.sh | 11 +++-- mmdebstrap | 114 +++++++++++++++++++++++++++++++++++----------------- tarfilter | 95 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 180 insertions(+), 40 deletions(-) create mode 100755 tarfilter diff --git a/coverage.sh b/coverage.sh index 7c21522..9c76e72 100755 --- a/coverage.sh +++ b/coverage.sh @@ -20,7 +20,7 @@ fi perlcritic --severity 4 --verbose 8 mmdebstrap -black --check taridshift +black --check taridshift tarfilter mirrordir="./shared/cache/debian" @@ -70,6 +70,9 @@ fi if [ ! -e shared/taridshift ] || [ taridshift -nt shared/taridshift ]; then cp -a taridshift shared fi +if [ ! -e shared/tarfilter ] || [ tarfilter -nt shared/tarfilter ]; then + cp -a tarfilter shared +fi starttime= total=149 @@ -1727,11 +1730,11 @@ cat << END > shared/test.sh set -eu export LC_ALL=C.UTF-8 echo no-pager > /tmp/config -$CMD --mode=root --variant=apt --dpkgopt="path-exclude=/usr/share/doc/*" --dpkgopt=/tmp/config $DEFAULT_DIST /tmp/debian-chroot $mirror -printf 'path-exclude=/usr/share/doc/*\nno-pager\n' | cmp /tmp/debian-chroot/etc/dpkg/dpkg.cfg.d/99mmdebstrap - +$CMD --mode=root --variant=apt --dpkgopt="path-exclude=/usr/share/doc/*" --dpkgopt=/tmp/config --dpkgopt="path-include=/usr/share/doc/dpkg/copyright" $DEFAULT_DIST /tmp/debian-chroot $mirror +printf 'path-exclude=/usr/share/doc/*\nno-pager\npath-include=/usr/share/doc/dpkg/copyright\n' | cmp /tmp/debian-chroot/etc/dpkg/dpkg.cfg.d/99mmdebstrap - rm /tmp/debian-chroot/etc/dpkg/dpkg.cfg.d/99mmdebstrap tar -C /tmp/debian-chroot --one-file-system -c . | tar -t | sort > tar2.txt -grep -v '^./usr/share/doc/.' tar1.txt | diff -u - tar2.txt +{ grep -v '^./usr/share/doc/.' tar1.txt; echo ./usr/share/doc/dpkg/; echo ./usr/share/doc/dpkg/copyright; } | sort | diff -u - tar2.txt rm -r /tmp/debian-chroot /tmp/config END if [ "$HAVE_QEMU" = "yes" ]; then diff --git a/mmdebstrap b/mmdebstrap index f1745e7..39623be 100755 --- a/mmdebstrap +++ b/mmdebstrap @@ -2018,28 +2018,100 @@ sub run_extract() { my $total = scalar @{$essential_pkgs}; foreach my $deb (@{$essential_pkgs}) { $counter += 1; + + my $tarfilter; + my @tarfilterargs; + # if the path-excluded option was added to the dpkg config, + # insert the tarfilter between dpkg-deb and tar + if (-e "$options->{root}/etc/dpkg/dpkg.cfg.d/99mmdebstrap") { + open(my $fh, '<', + "$options->{root}/etc/dpkg/dpkg.cfg.d/99mmdebstrap") + or error "cannot open /etc/dpkg/dpkg.cfg.d/99mmdebstrap: $!"; + my @matches = grep { /^path-(?:exclude|include)=/ } <$fh>; + close $fh; + chop @matches; # remove trailing newline + @tarfilterargs = map { "--" . $_ } @matches; + } + if (scalar @tarfilterargs > 0) { + if (-x "./tarfilter") { + $tarfilter = "./tarfilter"; + } else { + $tarfilter = "mmtarfilter"; + } + } + + my $dpkg_writer; + my $tar_reader; + my $filter_reader; + my $filter_writer; + if (scalar @tarfilterargs > 0) { + pipe $filter_reader, $dpkg_writer or error "pipe failed: $!"; + pipe $tar_reader, $filter_writer or error "pipe failed: $!"; + } else { + pipe $tar_reader, $dpkg_writer or error "pipe failed: $!"; + } # not using dpkg-deb --extract as that would replace the # merged-usr symlinks with plain directories - pipe my $rfh, my $wfh; + # not using dpkg --unpack because that would try running preinst + # maintainer scripts my $pid1 = fork() // error "fork() failed: $!"; if ($pid1 == 0) { - open(STDOUT, '>&', $wfh) or error "cannot open STDOUT: $!"; + open(STDOUT, '>&', $dpkg_writer) or error "cannot open STDOUT: $!"; + close($tar_reader) or error "cannot close tar_reader: $!"; + if (scalar @tarfilterargs > 0) { + close($filter_reader) + or error "cannot close filter_reader: $!"; + close($filter_writer) + or error "cannot close filter_writer: $!"; + } debug("running dpkg-deb --fsys-tarfile $options->{root}/$deb"); eval { Devel::Cover::set_coverage("none") } if $is_covering; exec 'dpkg-deb', '--fsys-tarfile', "$options->{root}/$deb"; } - my $pid2 = fork() // error "fork() failed: $!"; - if ($pid2 == 0) { - open(STDIN, '<&', $rfh) or error "cannot open STDIN: $!"; + my $pid2; + if (scalar @tarfilterargs > 0) { + $pid2 = fork() // error "fork() failed: $!"; + if ($pid2 == 0) { + open(STDIN, '<&', $filter_reader) + or error "cannot open STDIN: $!"; + open(STDOUT, '>&', $filter_writer) + or error "cannot open STDOUT: $!"; + close($dpkg_writer) or error "cannot close dpkg_writer: $!"; + close($tar_reader) or error "cannot close tar_reader: $!"; + debug("running $tarfilter " . (join " ", @tarfilterargs)); + eval { Devel::Cover::set_coverage("none") } if $is_covering; + exec $tarfilter, @tarfilterargs; + } + } + my $pid3 = fork() // error "fork() failed: $!"; + if ($pid3 == 0) { + open(STDIN, '<&', $tar_reader) or error "cannot open STDIN: $!"; + close($dpkg_writer) or error "cannot close dpkg_writer: $!"; + if (scalar @tarfilterargs > 0) { + close($filter_reader) + or error "cannot close filter_reader: $!"; + close($filter_writer) + or error "cannot close filter_writer: $!"; + } debug( "running tar -C $options->{root}" . " --keep-directory-symlink --extract --file -"); eval { Devel::Cover::set_coverage("none") } if $is_covering; exec 'tar', '-C', $options->{root}, '--keep-directory-symlink', '--extract', '--file', '-'; } + close($dpkg_writer) or error "cannot close dpkg_writer: $!"; + close($tar_reader) or error "cannot close tar_reader: $!"; + if (scalar @tarfilterargs > 0) { + close($filter_reader) or error "cannot close filter_reader: $!"; + close($filter_writer) or error "cannot close filter_writer: $!"; + } waitpid($pid1, 0); $? == 0 or error "dpkg-deb --fsys-tarfile failed: $?"; - waitpid($pid2, 0); + if (scalar @tarfilterargs > 0) { + waitpid($pid2, 0); + $? == 0 or error "tarfilter failed: $?"; + } + waitpid($pid3, 0); $? == 0 or error "tar --extract failed: $?"; print_progress($counter / $total * 100); } @@ -2311,36 +2383,6 @@ sub run_essential() { $options ); } - - # if the path-excluded option was added to the dpkg config, - # reinstall all packages - if ((!$options->{dryrun}) - and -e "$options->{root}/etc/dpkg/dpkg.cfg.d/99mmdebstrap") { - open(my $fh, '<', - "$options->{root}/etc/dpkg/dpkg.cfg.d/99mmdebstrap") - or error "cannot open /etc/dpkg/dpkg.cfg.d/99mmdebstrap: $!"; - my $num_matches = grep { /^path-exclude=/ } <$fh>; - close $fh; - if ($num_matches > 0) { - # without --skip-same-version, dpkg will install the given - # packages even though they are already installed - info "re-installing packages because of path-exclude..."; - run_chroot( - sub { - run_dpkg_progress({ - ARGV => [ - @{$chrootcmd}, 'env', - '--unset=TMPDIR', 'dpkg', - '--install', '--force-depends' - ], - PKGS => $essential_pkgs, - }); - }, - $options - ); - } - } - } else { error "unknown mode: $options->{mode}"; } diff --git a/tarfilter b/tarfilter new file mode 100755 index 0000000..691e383 --- /dev/null +++ b/tarfilter @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# +# This script is in the public domain +# +# This script accepts a tarball on standard input and filters it according to +# the same rules used by dpkg --path-exclude and --path-include, using command +# line options of the same name. The result is then printed on standard output. +# +# A tool like this should be written in C but libarchive has issues: +# https://github.com/libarchive/libarchive/issues/587 +# https://github.com/libarchive/libarchive/pull/1288/ (needs 3.4.1) +# Should these issues get fixed, then a good template is tarfilter.c in the +# examples directory of libarchive. +# +# We are not using Perl either, because Archive::Tar slurps the whole tarball +# into memory. +# +# We could also use Go but meh... +# https://stackoverflow.com/a/59542307/784669 + +import tarfile +import sys +import argparse +from fnmatch import fnmatch +import re + + +class FilterAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + items = getattr(namespace, "filter", []) + items.append((self.dest, values)) + setattr(namespace, "filter", items) + + +def main(): + parser = argparse.ArgumentParser(description="filter a tarball") + parser.add_argument( + "--path-exclude", + metavar="pattern", + action=FilterAction, + help="Exclude path matching the given shell pattern.", + ) + parser.add_argument( + "--path-include", + metavar="pattern", + action=FilterAction, + help="Re-include a pattern after a previous exclusion.", + ) + args = parser.parse_args() + if not hasattr(args, "filter"): + from shutil import copyfileobj + + copyfileobj(sys.stdin.buffer, sys.stdout.buffer) + exit() + + # same logic as in dpkg/src/filters.c/filter_should_skip() + def filter_should_skip(member): + skip = False + if not args.filter: + return False + for (t, f) in args.filter: + if fnmatch(member.name[1:], f): + if t == "path_include": + skip = False + else: + skip = True + if skip and (member.isdir() or member.issym()): + for (t, f) in args.filter: + if t != "path_include": + continue + prefix = re.sub(r"^([^*?[\\]*).*", r"\1", f) + prefix = prefix.rstrip("/") + if member.name[1:].startswith(prefix): + if member.name == "./usr/share/doc/doc-debian": + print("foo", prefix, "bar", file=sys.stderr) + return False + return skip + + # starting with Python 3.8, the default format became PAX_FORMAT, so this + # is only for compatibility with older versions of Python 3 + with tarfile.open(fileobj=sys.stdin.buffer, mode="r|*") as in_tar, tarfile.open( + fileobj=sys.stdout.buffer, mode="w|", format=tarfile.PAX_FORMAT + ) as out_tar: + for member in in_tar: + if filter_should_skip(member): + continue + if member.isfile(): + with in_tar.extractfile(member) as file: + out_tar.addfile(member, file) + else: + out_tar.addfile(member) + + +if __name__ == "__main__": + main()