no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility

This commit is contained in:
Johannes 'josch' Schauer 2020-08-25 13:02:33 +02:00
parent 8f09c3e02f
commit 465c056434
Signed by untrusted user: josch
GPG key ID: F2CBA5C78FBD83E1
3 changed files with 180 additions and 40 deletions

View file

@ -20,7 +20,7 @@ fi
perlcritic --severity 4 --verbose 8 mmdebstrap
black --check taridshift
black --check taridshift tarfilter
mirrordir="./shared/cache/debian"
@ -70,6 +70,9 @@ fi
if [ ! -e shared/taridshift ] || [ taridshift -nt shared/taridshift ]; then
cp -a taridshift shared
fi
if [ ! -e shared/tarfilter ] || [ tarfilter -nt shared/tarfilter ]; then
cp -a tarfilter shared
fi
starttime=
total=149
@ -1727,11 +1730,11 @@ cat << END > shared/test.sh
set -eu
export LC_ALL=C.UTF-8
echo no-pager > /tmp/config
$CMD --mode=root --variant=apt --dpkgopt="path-exclude=/usr/share/doc/*" --dpkgopt=/tmp/config $DEFAULT_DIST /tmp/debian-chroot $mirror
printf 'path-exclude=/usr/share/doc/*\nno-pager\n' | cmp /tmp/debian-chroot/etc/dpkg/dpkg.cfg.d/99mmdebstrap -
$CMD --mode=root --variant=apt --dpkgopt="path-exclude=/usr/share/doc/*" --dpkgopt=/tmp/config --dpkgopt="path-include=/usr/share/doc/dpkg/copyright" $DEFAULT_DIST /tmp/debian-chroot $mirror
printf 'path-exclude=/usr/share/doc/*\nno-pager\npath-include=/usr/share/doc/dpkg/copyright\n' | cmp /tmp/debian-chroot/etc/dpkg/dpkg.cfg.d/99mmdebstrap -
rm /tmp/debian-chroot/etc/dpkg/dpkg.cfg.d/99mmdebstrap
tar -C /tmp/debian-chroot --one-file-system -c . | tar -t | sort > tar2.txt
grep -v '^./usr/share/doc/.' tar1.txt | diff -u - tar2.txt
{ grep -v '^./usr/share/doc/.' tar1.txt; echo ./usr/share/doc/dpkg/; echo ./usr/share/doc/dpkg/copyright; } | sort | diff -u - tar2.txt
rm -r /tmp/debian-chroot /tmp/config
END
if [ "$HAVE_QEMU" = "yes" ]; then

View file

@ -2018,28 +2018,100 @@ sub run_extract() {
my $total = scalar @{$essential_pkgs};
foreach my $deb (@{$essential_pkgs}) {
$counter += 1;
my $tarfilter;
my @tarfilterargs;
# if the path-excluded option was added to the dpkg config,
# insert the tarfilter between dpkg-deb and tar
if (-e "$options->{root}/etc/dpkg/dpkg.cfg.d/99mmdebstrap") {
open(my $fh, '<',
"$options->{root}/etc/dpkg/dpkg.cfg.d/99mmdebstrap")
or error "cannot open /etc/dpkg/dpkg.cfg.d/99mmdebstrap: $!";
my @matches = grep { /^path-(?:exclude|include)=/ } <$fh>;
close $fh;
chop @matches; # remove trailing newline
@tarfilterargs = map { "--" . $_ } @matches;
}
if (scalar @tarfilterargs > 0) {
if (-x "./tarfilter") {
$tarfilter = "./tarfilter";
} else {
$tarfilter = "mmtarfilter";
}
}
my $dpkg_writer;
my $tar_reader;
my $filter_reader;
my $filter_writer;
if (scalar @tarfilterargs > 0) {
pipe $filter_reader, $dpkg_writer or error "pipe failed: $!";
pipe $tar_reader, $filter_writer or error "pipe failed: $!";
} else {
pipe $tar_reader, $dpkg_writer or error "pipe failed: $!";
}
# not using dpkg-deb --extract as that would replace the
# merged-usr symlinks with plain directories
pipe my $rfh, my $wfh;
# not using dpkg --unpack because that would try running preinst
# maintainer scripts
my $pid1 = fork() // error "fork() failed: $!";
if ($pid1 == 0) {
open(STDOUT, '>&', $wfh) or error "cannot open STDOUT: $!";
open(STDOUT, '>&', $dpkg_writer) or error "cannot open STDOUT: $!";
close($tar_reader) or error "cannot close tar_reader: $!";
if (scalar @tarfilterargs > 0) {
close($filter_reader)
or error "cannot close filter_reader: $!";
close($filter_writer)
or error "cannot close filter_writer: $!";
}
debug("running dpkg-deb --fsys-tarfile $options->{root}/$deb");
eval { Devel::Cover::set_coverage("none") } if $is_covering;
exec 'dpkg-deb', '--fsys-tarfile', "$options->{root}/$deb";
}
my $pid2 = fork() // error "fork() failed: $!";
my $pid2;
if (scalar @tarfilterargs > 0) {
$pid2 = fork() // error "fork() failed: $!";
if ($pid2 == 0) {
open(STDIN, '<&', $rfh) or error "cannot open STDIN: $!";
open(STDIN, '<&', $filter_reader)
or error "cannot open STDIN: $!";
open(STDOUT, '>&', $filter_writer)
or error "cannot open STDOUT: $!";
close($dpkg_writer) or error "cannot close dpkg_writer: $!";
close($tar_reader) or error "cannot close tar_reader: $!";
debug("running $tarfilter " . (join " ", @tarfilterargs));
eval { Devel::Cover::set_coverage("none") } if $is_covering;
exec $tarfilter, @tarfilterargs;
}
}
my $pid3 = fork() // error "fork() failed: $!";
if ($pid3 == 0) {
open(STDIN, '<&', $tar_reader) or error "cannot open STDIN: $!";
close($dpkg_writer) or error "cannot close dpkg_writer: $!";
if (scalar @tarfilterargs > 0) {
close($filter_reader)
or error "cannot close filter_reader: $!";
close($filter_writer)
or error "cannot close filter_writer: $!";
}
debug( "running tar -C $options->{root}"
. " --keep-directory-symlink --extract --file -");
eval { Devel::Cover::set_coverage("none") } if $is_covering;
exec 'tar', '-C', $options->{root},
'--keep-directory-symlink', '--extract', '--file', '-';
}
close($dpkg_writer) or error "cannot close dpkg_writer: $!";
close($tar_reader) or error "cannot close tar_reader: $!";
if (scalar @tarfilterargs > 0) {
close($filter_reader) or error "cannot close filter_reader: $!";
close($filter_writer) or error "cannot close filter_writer: $!";
}
waitpid($pid1, 0);
$? == 0 or error "dpkg-deb --fsys-tarfile failed: $?";
if (scalar @tarfilterargs > 0) {
waitpid($pid2, 0);
$? == 0 or error "tarfilter failed: $?";
}
waitpid($pid3, 0);
$? == 0 or error "tar --extract failed: $?";
print_progress($counter / $total * 100);
}
@ -2311,36 +2383,6 @@ sub run_essential() {
$options
);
}
# if the path-excluded option was added to the dpkg config,
# reinstall all packages
if ((!$options->{dryrun})
and -e "$options->{root}/etc/dpkg/dpkg.cfg.d/99mmdebstrap") {
open(my $fh, '<',
"$options->{root}/etc/dpkg/dpkg.cfg.d/99mmdebstrap")
or error "cannot open /etc/dpkg/dpkg.cfg.d/99mmdebstrap: $!";
my $num_matches = grep { /^path-exclude=/ } <$fh>;
close $fh;
if ($num_matches > 0) {
# without --skip-same-version, dpkg will install the given
# packages even though they are already installed
info "re-installing packages because of path-exclude...";
run_chroot(
sub {
run_dpkg_progress({
ARGV => [
@{$chrootcmd}, 'env',
'--unset=TMPDIR', 'dpkg',
'--install', '--force-depends'
],
PKGS => $essential_pkgs,
});
},
$options
);
}
}
} else {
error "unknown mode: $options->{mode}";
}

95
tarfilter Executable file
View file

@ -0,0 +1,95 @@
#!/usr/bin/env python3
#
# This script is in the public domain
#
# This script accepts a tarball on standard input and filters it according to
# the same rules used by dpkg --path-exclude and --path-include, using command
# line options of the same name. The result is then printed on standard output.
#
# A tool like this should be written in C but libarchive has issues:
# https://github.com/libarchive/libarchive/issues/587
# https://github.com/libarchive/libarchive/pull/1288/ (needs 3.4.1)
# Should these issues get fixed, then a good template is tarfilter.c in the
# examples directory of libarchive.
#
# We are not using Perl either, because Archive::Tar slurps the whole tarball
# into memory.
#
# We could also use Go but meh...
# https://stackoverflow.com/a/59542307/784669
import tarfile
import sys
import argparse
from fnmatch import fnmatch
import re
class FilterAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
items = getattr(namespace, "filter", [])
items.append((self.dest, values))
setattr(namespace, "filter", items)
def main():
parser = argparse.ArgumentParser(description="filter a tarball")
parser.add_argument(
"--path-exclude",
metavar="pattern",
action=FilterAction,
help="Exclude path matching the given shell pattern.",
)
parser.add_argument(
"--path-include",
metavar="pattern",
action=FilterAction,
help="Re-include a pattern after a previous exclusion.",
)
args = parser.parse_args()
if not hasattr(args, "filter"):
from shutil import copyfileobj
copyfileobj(sys.stdin.buffer, sys.stdout.buffer)
exit()
# same logic as in dpkg/src/filters.c/filter_should_skip()
def filter_should_skip(member):
skip = False
if not args.filter:
return False
for (t, f) in args.filter:
if fnmatch(member.name[1:], f):
if t == "path_include":
skip = False
else:
skip = True
if skip and (member.isdir() or member.issym()):
for (t, f) in args.filter:
if t != "path_include":
continue
prefix = re.sub(r"^([^*?[\\]*).*", r"\1", f)
prefix = prefix.rstrip("/")
if member.name[1:].startswith(prefix):
if member.name == "./usr/share/doc/doc-debian":
print("foo", prefix, "bar", file=sys.stderr)
return False
return skip
# starting with Python 3.8, the default format became PAX_FORMAT, so this
# is only for compatibility with older versions of Python 3
with tarfile.open(fileobj=sys.stdin.buffer, mode="r|*") as in_tar, tarfile.open(
fileobj=sys.stdout.buffer, mode="w|", format=tarfile.PAX_FORMAT
) as out_tar:
for member in in_tar:
if filter_should_skip(member):
continue
if member.isfile():
with in_tar.extractfile(member) as file:
out_tar.addfile(member, file)
else:
out_tar.addfile(member)
if __name__ == "__main__":
main()