From cc3150ef04c24b6837c04eed6b89ee15139c4377 Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Tue, 24 May 2022 11:47:16 +0200 Subject: [PATCH] Rework download stage to allow file:// mirrors - factor out package downloading function - replace -oApt::Get::Download-Only=true by -oDebug::pkgDpkgPm=1 - remove guessing of package names in /var/cache/apt/archives/ - drop edsp parsing with proxysolver/mmdebstrap-dump-solution to obtain downloaded filenames in favour of -oDpkg::Pre-Install-Pkgs::=cat - /var/cache/apt/archives/ is now allowed to contain packages - drop --skip=download/empty - file:// mirrors are now supported if their path is available inside the chroot --- coverage.sh | 14 +-- mmdebstrap | 336 +++++++++++++++++++--------------------------------- 2 files changed, 126 insertions(+), 224 deletions(-) diff --git a/coverage.sh b/coverage.sh index b8afbf6..1eb87e4 100755 --- a/coverage.sh +++ b/coverage.sh @@ -1535,7 +1535,7 @@ else skipped=$((skipped+1)) fi -print_header "mode=$defaultmode,variant=apt: fail with file:// mirror" +print_header "mode=$defaultmode,variant=apt: file:// mirror" cat << END > shared/test.sh #!/bin/sh set -eu @@ -1544,13 +1544,9 @@ if [ ! -e /mmdebstrap-testenv ]; then echo "this test requires the cache directory to be mounted on /mnt and should only be run inside a container" >&2 exit 1 fi -ret=0 -$CMD --mode=$defaultmode --variant=apt $DEFAULT_DIST /tmp/debian-chroot.tar "deb file:///mnt/cache/debian unstable main" || ret=\$? +$CMD --mode=$defaultmode --variant=apt --setup-hook='mkdir -p "\$1"/mnt/cache/debian; mount -o ro,bind /mnt/cache/debian "\$1"/mnt/cache/debian' --customize-hook='umount "\$1"/mnt/cache/debian; rmdir "\$1"/mnt/cache/debian "\$1"/mnt/cache' $DEFAULT_DIST /tmp/debian-chroot.tar "deb file:///mnt/cache/debian $DEFAULT_DIST main" +tar -tf /tmp/debian-chroot.tar | sort | diff -u tar1.txt - rm /tmp/debian-chroot.tar -if [ "\$ret" = 0 ]; then - echo expected failure but got exit \$ret >&2 - exit 1 -fi END if [ "$HAVE_QEMU" = "yes" ]; then ./run_qemu.sh @@ -3083,7 +3079,7 @@ $CMD \$include --mode=$defaultmode --variant=$variant \ --setup-hook='sync-in "'"\$tmpdir"'" /var/cache/apt/archives/partial' \ $DEFAULT_DIST - $mirror > test1.tar cmp orig.tar test1.tar -$CMD \$include --mode=$defaultmode --variant=$variant --skip=download/empty \ +$CMD \$include --mode=$defaultmode --variant=$variant \ --customize-hook='touch "\$1"/var/cache/apt/archives/partial' \ --setup-hook='mkdir -p "\$1"/var/cache/apt/archives/' \ --setup-hook='sync-in "'"\$tmpdir"'" /var/cache/apt/archives/' \ @@ -3250,8 +3246,6 @@ rm /tmp/debian-chroot/etc/hostname rm /tmp/debian-chroot/etc/resolv.conf rm /tmp/debian-chroot/var/lib/dpkg/status rm /tmp/debian-chroot/var/cache/apt/archives/lock -rm /tmp/debian-chroot/var/lib/dpkg/lock -rm /tmp/debian-chroot/var/lib/dpkg/lock-frontend rm /tmp/debian-chroot/var/lib/apt/lists/lock ## delete merged usr symlinks #rm /tmp/debian-chroot/libx32 diff --git a/mmdebstrap b/mmdebstrap index 3f73d2b..d0d9ddb 100755 --- a/mmdebstrap +++ b/mmdebstrap @@ -872,30 +872,11 @@ sub run_dpkg_progress { } sub run_apt_progress { - my $options = shift; - my @debs = @{ $options->{PKGS} // [] }; - my $tmpedsp; - if (exists $options->{EDSP_RES}) { - (undef, $tmpedsp) = tempfile( - "mmdebstrap.edsp.XXXXXXXXXXXX", - OPEN => 0, - TMPDIR => 1 - ); - } + my $options = shift; + my @debs = @{ $options->{PKGS} // [] }; my $get_exec = sub { my @prefix = (); my @opts = (); - if (exists $options->{EDSP_RES}) { - push @prefix, 'env', "APT_EDSP_DUMP_FILENAME=$tmpedsp"; - if (-e "./proxysolver") { - # for development purposes, use the current directory if it - # contains a file called proxysolver - push @opts, ("-oDir::Bin::solvers=" . getcwd()), - '--solver=proxysolver'; - } else { - push @opts, '--solver=mmdebstrap-dump-solution'; - } - } return ( @prefix, @{ $options->{ARGV} }, @@ -950,38 +931,79 @@ sub run_apt_progress { } }; run_progress $get_exec, $line_handler, $line_has_error, $options->{CHDIR}; - if (exists $options->{EDSP_RES}) { - info "parsing EDSP results..."; - open my $fh, '<', $tmpedsp - or error "failed to open $tmpedsp for reading: $!"; - my $inst = 0; - my $pkg; - my $ver; - while (my $line = <$fh>) { - chomp $line; - if ($line ne "") { - if ($line =~ /^Install: \d+/) { - $inst = 1; - } elsif ($line =~ /^Package: (.*)/) { - $pkg = $1; - } elsif ($line =~ /^Version: (.*)/) { - $ver = $1; - } - next; - } - if ($inst == 1 && defined $pkg && defined $ver) { - push @{ $options->{EDSP_RES} }, [$pkg, $ver]; - } - $inst = 0; - undef $pkg; - undef $ver; - } - close $fh; - unlink $tmpedsp; - } return; } +sub run_apt_download_progress { + my $options = shift; + if ($options->{dryrun}) { + info "simulate downloading packages with apt..."; + } else { + info "downloading packages with apt..."; + } + pipe my $rfh, my $wfh; + my $pid = open my $fh, '-|' // error "fork() failed: $!"; + if ($pid == 0) { + close $wfh; + # read until parent process closes $wfh + my $content = do { local $/; <$rfh> }; + close $rfh; + # the parent is done -- pass what we read back to it + print $content; + exit 0; + } + close $rfh; + # Unset the close-on-exec flag, so that the file descriptor does not + # get closed when we exec + my $flags = fcntl($wfh, F_GETFD, 0) or error "fcntl F_GETFD: $!"; + fcntl($wfh, F_SETFD, $flags & ~FD_CLOEXEC) or error "fcntl F_SETFD: $!"; + my $fd = fileno $wfh; + # 2022-05-02, #debian-apt on OFTC, times in UTC+2 + # 16:57 < josch> DonKult: how is -oDebug::pkgDpkgPm=1 -oDir::Log=/dev/null + # a "fancy no-op"? + # 11:52 < DonKult> josch: "fancy no-op" in sofar as it does nothing to the + # system even through its not in a special mode ala + # simulation or download-only. It does all the things it + # normally does, except that it just prints the dpkg calls + # instead of execv() them which in practice amounts means + # it does nothing (the Dir::Log just prevents libapt from + # creating the /var/log/apt directories. As the code + # creates them even if no logs will be placed there…). As + # said, midterm an apt --print-install-packages or + # something would be nice to avoid running everything. + run_apt_progress({ + ARGV => [ + 'apt-get', + '--yes', + '-oDebug::pkgDpkgPm=1', + '-oDir::Log=/dev/null', + $options->{dryrun} + ? '-oAPT::Get::Simulate=true' + : ( + "-oAPT::Keep-Fds::=$fd", + "-oDPkg::Tools::options::'cat >&$fd'::InfoFD=$fd", + "-oDpkg::Pre-Install-Pkgs::=cat >&$fd", + # no need to lock the database if we are just downloading + "-oDebug::NoLocking=1", + # no need for pty magic if we write no log + "-oDpkg::Use-Pty=0", + ), + @{ $options->{APT_ARGV} }, + ], + }); + # signal the child process that we are done + close $wfh; + # and then read from it what it got + my @listofdebs = <$fh>; + close $fh; + if ($? != 0) { + error "status child failed"; + } + # remove trailing newlines + chomp @listofdebs; + return @listofdebs; +} + sub run_chroot { my $cmd = shift; my $options = shift; @@ -2037,24 +2059,12 @@ sub run_update() { sub run_download() { my $options = shift; - # We use /var/cache/apt/archives/ to figure out which packages apt chooses - # to install. That's why the directory must be empty if: - # - /var/cache/apt/archives exists, and - # - no simulation run is done, and - # - the variant is not extract or custom or the number to be - # installed packages not zero - # - # We could also unconditionally use the proxysolver and then "apt-get - # download" any missing packages but using the proxysolver requires - # /usr/lib/apt/solvers/apt from the apt-utils package and we want to avoid - # that dependency. - # # In the future we want to replace downloading packages with "apt-get - # install --download-only" and installing them with dpkg by just installing - # the essential packages with apt from the outside with - # DPkg::Chroot-Directory. We are not doing that because then the preinst - # script of base-passwd will not be called early enough and packages will - # fail to install because they are missing /etc/passwd. + # install" and installing them with dpkg by just installing the essential + # packages with apt from the outside with DPkg::Chroot-Directory. + # We are not doing that because then the preinst script of base-passwd will + # not be called early enough and packages will fail to install because they + # are missing /etc/passwd. my @cached_debs = (); my @dl_debs = (); if ( @@ -2076,14 +2086,6 @@ sub run_download() { push @cached_debs, $deb; } closedir $dh; - if (scalar @cached_debs > 0) { - if (any { $_ eq 'download/empty' } @{ $options->{skip} }) { - info "skipping download/empty as requested"; - } else { - error("/var/cache/apt/archives/ inside the chroot contains: " - . (join ', ', (sort @cached_debs))); - } - } } # To figure out the right package set for the apt variant we can use: @@ -2097,7 +2099,7 @@ sub run_download() { info "nothing to download -- skipping..."; return ([], \@cached_debs); } - my %pkgs_to_install; + my @apt_argv = ('install'); for my $incl (@{ $options->{include} }) { for my $pkg (split /[,\s]+/, $incl) { # strip leading and trailing whitespace @@ -2106,32 +2108,15 @@ sub run_download() { if ($pkg eq '') { next; } - $pkgs_to_install{$pkg} = (); + push @apt_argv, $pkg; } } - my %result = (); - if ($options->{dryrun}) { - info "simulate downloading packages with apt..."; - } else { - # if there are already packages in /var/cache/apt/archives/, we - # need to use our proxysolver to obtain the solution chosen by apt - if (scalar @cached_debs > 0) { - $result{EDSP_RES} = \@dl_debs; - } - info "downloading packages with apt..."; - } - run_apt_progress({ - ARGV => [ - 'apt-get', - '--yes', - '-oApt::Get::Download-Only=true', - $options->{dryrun} ? '-oAPT::Get::Simulate=true' : (), - 'install' - ], - PKGS => [keys %pkgs_to_install], - %result - }); + @dl_debs = run_apt_download_progress({ + APT_ARGV => [@apt_argv], + dryrun => $options->{dryrun}, + }, + ); } elsif ($options->{variant} eq 'apt') { # if we just want to install Essential:yes packages, apt and their # dependencies then we can make use of libapt treating apt as @@ -2146,27 +2131,11 @@ sub run_download() { # remind me in 5+ years that I said that after I wrote # in the bugreport: "Are you crazy?!? Nobody in his # right mind would even suggest depending on it!") - my %result = (); - if ($options->{dryrun}) { - info "simulate downloading packages with apt..."; - } else { - # if there are already packages in /var/cache/apt/archives/, we - # need to use our proxysolver to obtain the solution chosen by apt - if (scalar @cached_debs > 0) { - $result{EDSP_RES} = \@dl_debs; - } - info "downloading packages with apt..."; - } - run_apt_progress({ - ARGV => [ - 'apt-get', - '--yes', - '-oApt::Get::Download-Only=true', - $options->{dryrun} ? '-oAPT::Get::Simulate=true' : (), - 'dist-upgrade' - ], - %result - }); + @dl_debs = run_apt_download_progress({ + APT_ARGV => ['dist-upgrade'], + dryrun => $options->{dryrun}, + }, + ); } elsif ( any { $_ eq $options->{variant} } ('essential', 'standard', 'important', 'required', 'buildd') @@ -2175,23 +2144,8 @@ sub run_download() { # 17:27 < DonKult> (?essential includes 'apt' through) # 17:30 < josch> DonKult: no, because pkgCacheGen::ForceEssential ","; # 17:32 < DonKult> touché - my %result = (); - if ($options->{dryrun}) { - info "simulate downloading packages with apt..."; - } else { - # if there are already packages in /var/cache/apt/archives/, we - # need to use our proxysolver to obtain the solution chosen by apt - if (scalar @cached_debs > 0) { - $result{EDSP_RES} = \@dl_debs; - } - info "downloading packages with apt..."; - } - run_apt_progress({ - ARGV => [ - 'apt-get', - '--yes', - '-oApt::Get::Download-Only=true', - $options->{dryrun} ? '-oAPT::Get::Simulate=true' : (), + @dl_debs = run_apt_download_progress({ + APT_ARGV => [ 'install', '?narrow(' . ( @@ -2206,76 +2160,34 @@ sub run_download() { . $options->{nativearch} . '),?essential)' ], - %result - }); + dryrun => $options->{dryrun}, + }, + ); } else { error "unknown variant: $options->{variant}"; } my @essential_pkgs; - if (scalar @cached_debs > 0 && scalar @dl_debs > 0) { - my $archives = "/var/cache/apt/archives/"; - # for each package in @dl_debs, check if it's in - # /var/cache/apt/archives/ and add it to @essential_pkgs - foreach my $p (@dl_debs) { - my ($pkg, $ver_epoch) = @{$p}; - # apt appends the architecture at the end of the package name - ($pkg, my $arch) = split ':', $pkg, 2; - # apt replaces the colon by its percent encoding %3a - my $ver = $ver_epoch; - $ver =~ s/:/%3a/; - # the architecture returned by apt is the native architecture. - # Since we don't know whether the package is architecture - # independent or not, we first try with the native arch and then - # with "all" and only error out if neither exists. - if (-e "$options->{root}/$archives/${pkg}_${ver}_$arch.deb") { - push @essential_pkgs, "$archives/${pkg}_${ver}_$arch.deb"; - } elsif (-e "$options->{root}/$archives/${pkg}_${ver}_all.deb") { - push @essential_pkgs, "$archives/${pkg}_${ver}_all.deb"; - } else { - error( "cannot find package for $pkg:$arch (= $ver_epoch) " - . "in /var/cache/apt/archives/"); + # strip the chroot directory from the filenames + foreach my $deb (@dl_debs) { + # if filename does not start with chroot directory then the user + # might've used a file:// mirror and we check whether the path is + # accessible inside the chroot + if (rindex $deb, $options->{root}, 0) { + if (!-e "$options->{root}/$deb") { + error "package file $deb not accessible from chroot directory" + . " -- use copy:// instead of file:// or a bind-mount"; } + push @essential_pkgs, $deb; + next; } - } else { - # collect the .deb files that were downloaded by apt from the content - # of /var/cache/apt/archives/ - if (!$options->{dryrun}) { - my $apt_archives = "/var/cache/apt/archives/"; - opendir my $dh, "$options->{root}/$apt_archives" - or error "cannot read $apt_archives"; - while (my $deb = readdir $dh) { - if ($deb !~ /\.deb$/) { - next; - } - $deb = "$apt_archives/$deb"; - if (!-f "$options->{root}/$deb") { - next; - } - push @essential_pkgs, $deb; - } - closedir $dh; - - if (scalar @essential_pkgs == 0) { - # check if a file:// URI was used - open(my $pipe_apt, '-|', 'apt-get', 'indextargets', '--format', - '$(URI)', 'Created-By: Packages') - or error "cannot start apt-get indextargets: $!"; - while (my $uri = <$pipe_apt>) { - if ($uri =~ /^file:\/\//) { - error - "nothing got downloaded -- use copy:// instead of" - . " file://"; - } - } - error "nothing got downloaded"; - } + # filename starts with chroot directory, strip it off + # this is the normal case + if (!-e $deb) { + error "cannot find package file $deb"; } + push @essential_pkgs, substr($deb, length($options->{root})); } - # Unpack order matters. Since we create this list using two different - # methods but we want both methods to have the same result, we sort the - # list before returning it. - @essential_pkgs = sort @essential_pkgs; return (\@essential_pkgs, \@cached_debs); } @@ -2732,6 +2644,10 @@ sub run_essential() { # before the download phase next if any { "/var/cache/apt/archives/$_" eq $deb } @{$cached_debs}; + # do not unlink those packages that were not in + # /var/cache/apt/archive (for example because they were provided by + # a file:// mirror) + next if $deb !~ /\/var\/cache\/apt\/archives\//; unlink "$options->{root}/$deb" or error "cannot unlink $deb: $!"; } @@ -6721,15 +6637,13 @@ the B step. This can be disabled using B<--skip=update>. =item B -Checks whether F is empty. This can be disabled with -B<--skip=download/empty>. In the B and B variants, C is used to download all the packages requested via the -B<--include> option. The B variant uses the fact that libapt treats the -C packages as implicitly essential to download only all C -packages plus apt using C. In the -remaining variants, all Packages files downloaded by the B step are -inspected to find the C package set as well as all packages of -the required priority. +In the B and B variants, C is used to +download all the packages requested via the B<--include> option. The B +variant uses the fact that libapt treats the C packages as implicitly +essential to download only all C packages plus apt using +C. In the remaining variants, all Packages files +downloaded by the B step are inspected to find the C +package set as well as all packages of the required priority. =item B @@ -6979,7 +6893,7 @@ apt-cacher-ng, you can use the B and B special hooks to synchronize a directory outside the chroot with F inside the chroot. - $ mmdebstrap --variant=apt --skip=download/empty --skip=essential/unlink \ + $ mmdebstrap --variant=apt --skip=essential/unlink \ --setup-hook='mkdir -p ./cache "$1"/var/cache/apt/archives/' \ --setup-hook='sync-in ./cache /var/cache/apt/archives/' \ --customize-hook='sync-out /var/cache/apt/archives ./cache' \ @@ -7149,12 +7063,6 @@ as the non-root user, then as a workaround you could run C so that the config files are only accessible by the root user. See Debian bug #808203. -The C URI type cannot be used to install the essential packages. This -is because B uses dpkg to install the packages that apt places into -F but with C apt will not copy the files even -with C<--download-only>. Use C instead, which is equivalent to -C but copies the archives into F. - With apt versions before 2.1.16, setting C<[trusted=yes]> or C to allow signed archives without a known public key or unsigned archives will fail because of a gpg warning in the