deb-shrink-packages/shrink.pl

340 lines
10 KiB
Perl
Executable file

#!/usr/bin/perl
# this program uses Dpkg::Index to randomly remove packages from the index
# until a minimal test case is found.
#
# Another way to solve this problem is:
#
# 09:02 < helmut> josch: a tool that has worked well for me for a number of
# problems was rolling an exponentially distributed number of
# lines from a random point. the lambda parameter was adapted
# with the success.
# 09:03 < helmut> josch: after finding a minimal set of lines, a
# language-specific tool can explode lines. for c that can be
# almost as simple as replacing space with newlines. then start
# over
# 09:04 < helmut> removing a sequence of lines has the benefit of removing
# whole functions/paragraphs/etc. and thus producing
# syntactically valid stuff occasionally
# 09:06 < helmut> I recommend that you spend a bit of time looking into haskell
# quickcheck's Arbitrary type class. it has a shrink member,
# which essentially does the same thing on haskell data
# structures
# 09:07 < helmut> drawing inspiration from there might help
use strict;
use warnings;
use Dpkg::Index;
use List::Util 'shuffle';
sub check {
my $wantedexit = shift;
print "running: ";
print join " ", @_;
print "\n";
system @_;
my $exit = $? >> 8;
if ($exit == $wantedexit) {
return 1;
} else {
print "got exit $exit but wanted exit $wantedexit\n";
return 0;
}
}
sub write_enabled_pkgs {
my $index = shift;
my $enabled = shift;
my $fname = shift;
print "writing to $fname\n";
# write out all the still enabled packages
open(my $fh, '>', $fname);
foreach my $k ($index->get_keys()) {
if (!$enabled->{$k}) {
next;
}
print { $fh } $index->get_by_key($k) . "\n";
}
close $fh;
}
sub randomize_pkgs {
my $index = shift;
my $enabled = shift;
my $id_to_key = shift;
my $tmpname = shift;
my $wantedexit = shift;
my @checkcmd = @_;
# randomly delete 0.5% of the packages until we failed to produce the
# expected result as many times as 1% of the package are.
my $num_failures = 0;
while (1) {
# fill an array with the remaining enabled indices
my @indices = ();
my $numpkgs = scalar keys %{$enabled};
for (my $i = 0; $i < $numpkgs; $i++) {
if (!$enabled->{$id_to_key->{$i}}) {
next;
}
push @indices, $i;
}
my @shuffled_indices = shuffle(@indices);
printf "1st phase: %d/%d\n", scalar @indices, $numpkgs;
my $tenpercent = (scalar @indices)*0.005;
my @to_disable = @shuffled_indices[ 0 .. $tenpercent - 1 ];
my $num_to_disable = scalar @to_disable;
print "disabling: $num_to_disable\n";
print "acc. failures: $num_failures\n";
for my $i (@to_disable) {
$enabled->{$id_to_key->{$i}} = 0;
}
write_enabled_pkgs($index, $enabled, $tmpname);
if (check($wantedexit, @checkcmd)) {
# the error we want to see is still there so we keep these keys
# disabled and reset the number of failures
$num_failures = 0;
} else {
# the error vanished
# re-enable the formerly disabled ones and increment the number of
# failures
for my $i (@to_disable) {
$enabled->{$id_to_key->{$i}} = 1;
}
$num_failures += 1;
}
# if we have unsuccessfully tried running the check as many times as
# the current percentage is removing at once, then it starts making
# sense to remove packages one-by-one
#
# In other words: It does not make sense to remove X packages at once
# if we are are running check() more than X times.
#
# We add the factor 2 because the threshold might be reached randomly
# as well.
if ($num_failures > $num_to_disable*2) {
last;
}
}
}
sub iterate_pkgs {
my $index = shift;
my $enabled = shift;
my $id_to_key = shift;
my $tmpname = shift;
my $wantedexit = shift;
my @checkcmd = @_;
# we extract the array of enabled indices for better progress reporting
# later on
my $numpkgs = scalar keys %{$enabled};
my @indices = ();
for (my $i = 0; $i < $numpkgs; $i++) {
if (!$enabled->{$id_to_key->{$i}}) {
next;
}
push @indices, $i;
}
# now delete single packages
# we need this stage to make sure to have looked at all packages at least
# once. Otherwise it can be that by chance a package was not randomly
# selected by the last stage.
my $i = 0;
foreach my $idx (@indices) {
my $key = $id_to_key->{$idx};
printf "2nd phase: %d/%d\n", $i, scalar @indices;
$i += 1;
# disable the current key
$enabled->{$key} = 0;
write_enabled_pkgs($index, $enabled, $tmpname);
if (check($wantedexit, @checkcmd)) {
# the error we want to see is still there so we keep this key disabled
} else {
# the error vanished
# re-enable the current key
$enabled->{$key} = 1;
}
}
}
sub delete_field {
my $index = shift;
my $enabled = shift;
my $id_to_key = shift;
my $field = shift;
my $tmpname = shift;
my $wantedexit = shift;
my @checkcmd = @_;
# we extract the array of enabled indices for better progress reporting
# later on
my $numpkgs = scalar keys %{$enabled};
my @indices = ();
for (my $i = 0; $i < $numpkgs; $i++) {
if (!$enabled->{$id_to_key->{$i}}) {
next;
}
push @indices, $i;
}
# now delete single packages
# we need this stage to make sure to have looked at all packages at least
# once. Otherwise it can be that by chance a package was not randomly
# selected by the last stage.
my $i = 0;
foreach my $idx (@indices) {
my $key = $id_to_key->{$idx};
printf "3nd phase (deleting $field): %d/%d\n", $i, scalar @indices;
$i += 1;
# skip if this package doesn't have such a key
my $pkg = $index->get_by_key($key);
if (!exists $pkg->{$field}) {
next;
}
my $fieldvalue = $pkg->{$field};
# disable the current key
delete $pkg->{$field};
write_enabled_pkgs($index, $enabled, $tmpname);
if (check($wantedexit, @checkcmd)) {
# the error we want to see is still there so we keep this field deleted
} else {
# the error vanished
# restore the package
$index->get_by_key($key)->{$field} = $fieldvalue;
}
}
}
sub main {
my @srcbinlisttype = ();
# first argument is the desired exit code
my $wantedexit = shift @ARGV;
# the following argument are paths to Packages/Sources files
# to find out how many of those we expect, we scan the rest of ARGV for
# the placeholders
foreach my $arg (@ARGV) {
if ($arg eq '__src__') {
push @srcbinlisttype, "src";
}
if ($arg eq '__bin__') {
push @srcbinlisttype, "bin";
}
}
# now we now which parts are the actual input file names
my @srcbinlistcontent = splice @ARGV, 0, (scalar @srcbinlisttype);
# the rest is the command to execute
my @execcmd = @ARGV;
my $i = 0;
my @tmpfiles = ();
foreach my $val (@execcmd) {
if ($val eq '__src__') {
$val = "src.$i.tmp";
push @tmpfiles, $val;
$i += 1;
}
if ($val eq '__bin__') {
$val = "bin.$i.tmp";
push @tmpfiles, $val;
$i += 1;
}
}
my @data = ();
foreach( 0 .. $#srcbinlistcontent ) {
my $f = $srcbinlistcontent[$_];
my $t = $srcbinlisttype[$_];
my $r = $tmpfiles[$_];
print "reading in $f\n";
my $key_func = sub {
return $_[0]->{Package} . ' ' . $_[0]->{Version} . ' ' . $_[0]->{Architecture};
};
my $index = Dpkg::Index->new(get_key_func=>$key_func);
$index->load($f);
my %allowed_fields = (
'Version' => 1,
'Package' => 1,
'Architecture' => 1,
);
if ($t eq "bin") {
$allowed_fields{'Depends'} = 1;
# 'Multi-Arch' => 1, # not needed with only one architecture
$allowed_fields{'Provides'} = 1;
$allowed_fields{'Breaks'} = 1;
$allowed_fields{'Conflicts'} = 1;
$allowed_fields{'Pre-Depends'} = 1;
$allowed_fields{'Source'} = 1;
$allowed_fields{'Essential'} = 1; # probably not needed
} else {
$allowed_fields{'Build-Depends'} = 1;
$allowed_fields{'Build-Depends-Arch'} = 1;
$allowed_fields{'Build-Depends-Indep'} = 1;
$allowed_fields{'Build-Conflicts'} = 1;
$allowed_fields{'Build-Conflicts-Arch'} = 1;
$allowed_fields{'Build-Conflicts-Indep'} = 1;
}
my %enabled = ();
my %id_to_key = ();
{
my $i = 0;
foreach my $key ($index->get_keys()) {
$enabled{$key} = 1;
$id_to_key{$i} = $key;
$i += 1;
# delete unnecessary fields
my $cdata = $index->get_by_key($key);
foreach my $field (keys %{$cdata}) {
if (! exists $allowed_fields{$field}) {
delete $cdata->{$field};
}
}
}
}
write_enabled_pkgs($index, \%enabled, $r);
push @data, [$r, $t, $index, \%enabled, \%id_to_key];
}
foreach my $v (@data) {
my ($tmpname, $t, $index, $enabled, $id_to_key) = @{$v};
randomize_pkgs($index, $enabled, $id_to_key, $tmpname, $wantedexit, @execcmd);
}
foreach my $v (@data) {
my ($tmpname, $t, $index, $enabled, $id_to_key) = @{$v};
iterate_pkgs($index, $enabled, $id_to_key, $tmpname, $wantedexit, @execcmd);
}
foreach my $v (@data) {
my ($tmpname, $t, $index, $enabled, $id_to_key) = @{$v};
if ($t eq "bin") {
delete_field($index, $enabled, $id_to_key, 'Depends', $tmpname, $wantedexit, @execcmd);
delete_field($index, $enabled, $id_to_key, 'Pre-Depends', $tmpname, $wantedexit, @execcmd);
delete_field($index, $enabled, $id_to_key, 'Conflicts', $tmpname, $wantedexit, @execcmd);
delete_field($index, $enabled, $id_to_key, 'Breaks', $tmpname, $wantedexit, @execcmd);
delete_field($index, $enabled, $id_to_key, 'Provides', $tmpname, $wantedexit, @execcmd);
} else {
delete_field($index, $enabled, $id_to_key, 'Build-Depends', $tmpname, $wantedexit, @execcmd);
delete_field($index, $enabled, $id_to_key, 'Build-Depends-Indep', $tmpname, $wantedexit, @execcmd);
delete_field($index, $enabled, $id_to_key, 'Build-Depends-Arch', $tmpname, $wantedexit, @execcmd);
delete_field($index, $enabled, $id_to_key, 'Build-Conflicts', $tmpname, $wantedexit, @execcmd);
delete_field($index, $enabled, $id_to_key, 'Build-Conflicts-Indep', $tmpname, $wantedexit, @execcmd);
delete_field($index, $enabled, $id_to_key, 'Build-Conflicts-Arch', $tmpname, $wantedexit, @execcmd);
}
}
# somehow, calling randomize_pkgs might remove build-essential
#randomize_pkgs($index, \%enabled, \%id_to_key);
foreach my $v (@data) {
my ($tmpname, $t, $index, $enabled, $id_to_key) = @{$v};
iterate_pkgs($index, $enabled, $id_to_key, $tmpname, $wantedexit, @execcmd);
}
}
main();