#!/usr/bin/perl # this program uses Dpkg::Index to randomly remove packages from the index # until a minimal test case is found. # # Another way to solve this problem is: # # 09:02 < helmut> josch: a tool that has worked well for me for a number of # problems was rolling an exponentially distributed number of # lines from a random point. the lambda parameter was adapted # with the success. # 09:03 < helmut> josch: after finding a minimal set of lines, a # language-specific tool can explode lines. for c that can be # almost as simple as replacing space with newlines. then start # over # 09:04 < helmut> removing a sequence of lines has the benefit of removing # whole functions/paragraphs/etc. and thus producing # syntactically valid stuff occasionally # 09:06 < helmut> I recommend that you spend a bit of time looking into haskell # quickcheck's Arbitrary type class. it has a shrink member, # which essentially does the same thing on haskell data # structures # 09:07 < helmut> drawing inspiration from there might help use strict; use warnings; use Dpkg::Index; use List::Util 'shuffle'; sub check { my $wantedexit = shift; print "running: "; print join " ", @_; print "\n"; system @_; my $exit = $? >> 8; if ($exit == $wantedexit) { return 1; } else { print "got exit $exit but wanted exit $wantedexit\n"; return 0; } } sub write_enabled_pkgs { my $index = shift; my $enabled = shift; my $fname = shift; print "writing to $fname\n"; # write out all the still enabled packages open(my $fh, '>', $fname); foreach my $k ($index->get_keys()) { if (!$enabled->{$k}) { next; } print { $fh } $index->get_by_key($k) . "\n"; } close $fh; } sub randomize_pkgs { my $index = shift; my $enabled = shift; my $id_to_key = shift; my $tmpname = shift; my $wantedexit = shift; my @checkcmd = @_; # randomly delete 0.5% of the packages until we failed to produce the # expected result as many times as 1% of the package are. my $num_failures = 0; while (1) { # fill an array with the remaining enabled indices my @indices = (); my $numpkgs = scalar keys %{$enabled}; for (my $i = 0; $i < $numpkgs; $i++) { if (!$enabled->{$id_to_key->{$i}}) { next; } push @indices, $i; } my @shuffled_indices = shuffle(@indices); printf "1st phase: %d/%d\n", scalar @indices, $numpkgs; my $tenpercent = (scalar @indices)*0.005; my @to_disable = @shuffled_indices[ 0 .. $tenpercent - 1 ]; my $num_to_disable = scalar @to_disable; print "disabling: $num_to_disable\n"; print "acc. failures: $num_failures\n"; for my $i (@to_disable) { $enabled->{$id_to_key->{$i}} = 0; } write_enabled_pkgs($index, $enabled, $tmpname); if (check($wantedexit, @checkcmd)) { # the error we want to see is still there so we keep these keys # disabled and reset the number of failures $num_failures = 0; } else { # the error vanished # re-enable the formerly disabled ones and increment the number of # failures for my $i (@to_disable) { $enabled->{$id_to_key->{$i}} = 1; } $num_failures += 1; } # if we have unsuccessfully tried running the check as many times as # the current percentage is removing at once, then it starts making # sense to remove packages one-by-one # # In other words: It does not make sense to remove X packages at once # if we are are running check() more than X times. # # We add the factor 2 because the threshold might be reached randomly # as well. if ($num_failures > $num_to_disable*2) { last; } } } sub iterate_pkgs { my $index = shift; my $enabled = shift; my $id_to_key = shift; my $tmpname = shift; my $wantedexit = shift; my @checkcmd = @_; # we extract the array of enabled indices for better progress reporting # later on my $numpkgs = scalar keys %{$enabled}; my @indices = (); for (my $i = 0; $i < $numpkgs; $i++) { if (!$enabled->{$id_to_key->{$i}}) { next; } push @indices, $i; } # now delete single packages # we need this stage to make sure to have looked at all packages at least # once. Otherwise it can be that by chance a package was not randomly # selected by the last stage. my $i = 0; foreach my $idx (@indices) { my $key = $id_to_key->{$idx}; printf "2nd phase: %d/%d\n", $i, scalar @indices; $i += 1; # disable the current key $enabled->{$key} = 0; write_enabled_pkgs($index, $enabled, $tmpname); if (check($wantedexit, @checkcmd)) { # the error we want to see is still there so we keep this key disabled } else { # the error vanished # re-enable the current key $enabled->{$key} = 1; } } } sub delete_field { my $index = shift; my $enabled = shift; my $id_to_key = shift; my $field = shift; my $tmpname = shift; my $wantedexit = shift; my @checkcmd = @_; # we extract the array of enabled indices for better progress reporting # later on my $numpkgs = scalar keys %{$enabled}; my @indices = (); for (my $i = 0; $i < $numpkgs; $i++) { if (!$enabled->{$id_to_key->{$i}}) { next; } push @indices, $i; } # now delete single packages # we need this stage to make sure to have looked at all packages at least # once. Otherwise it can be that by chance a package was not randomly # selected by the last stage. my $i = 0; foreach my $idx (@indices) { my $key = $id_to_key->{$idx}; printf "3nd phase (deleting $field): %d/%d\n", $i, scalar @indices; $i += 1; # skip if this package doesn't have such a key my $pkg = $index->get_by_key($key); if (!exists $pkg->{$field}) { next; } my $fieldvalue = $pkg->{$field}; # disable the current key delete $pkg->{$field}; write_enabled_pkgs($index, $enabled, $tmpname); if (check($wantedexit, @checkcmd)) { # the error we want to see is still there so we keep this field deleted } else { # the error vanished # restore the package $index->get_by_key($key)->{$field} = $fieldvalue; } } } sub main { my @srcbinlisttype = (); # first argument is the desired exit code my $wantedexit = shift @ARGV; # the following argument are paths to Packages/Sources files # to find out how many of those we expect, we scan the rest of ARGV for # the placeholders foreach my $arg (@ARGV) { if ($arg eq '__src__') { push @srcbinlisttype, "src"; } if ($arg eq '__bin__') { push @srcbinlisttype, "bin"; } } # now we now which parts are the actual input file names my @srcbinlistcontent = splice @ARGV, 0, (scalar @srcbinlisttype); # the rest is the command to execute my @execcmd = @ARGV; my $i = 0; my @tmpfiles = (); foreach my $val (@execcmd) { if ($val eq '__src__') { $val = "src.$i.tmp"; push @tmpfiles, $val; $i += 1; } if ($val eq '__bin__') { $val = "bin.$i.tmp"; push @tmpfiles, $val; $i += 1; } } my @data = (); foreach( 0 .. $#srcbinlistcontent ) { my $f = $srcbinlistcontent[$_]; my $t = $srcbinlisttype[$_]; my $r = $tmpfiles[$_]; print "reading in $f\n"; my $key_func = sub { return $_[0]->{Package} . ' ' . $_[0]->{Version} . ' ' . $_[0]->{Architecture}; }; my $index = Dpkg::Index->new(get_key_func=>$key_func); $index->load($f); my %allowed_fields = ( 'Version' => 1, 'Package' => 1, 'Architecture' => 1, ); if ($t eq "bin") { $allowed_fields{'Depends'} = 1; # 'Multi-Arch' => 1, # not needed with only one architecture $allowed_fields{'Provides'} = 1; $allowed_fields{'Breaks'} = 1; $allowed_fields{'Conflicts'} = 1; $allowed_fields{'Pre-Depends'} = 1; $allowed_fields{'Source'} = 1; $allowed_fields{'Essential'} = 1; # probably not needed } else { $allowed_fields{'Build-Depends'} = 1; $allowed_fields{'Build-Depends-Arch'} = 1; $allowed_fields{'Build-Depends-Indep'} = 1; $allowed_fields{'Build-Conflicts'} = 1; $allowed_fields{'Build-Conflicts-Arch'} = 1; $allowed_fields{'Build-Conflicts-Indep'} = 1; } my %enabled = (); my %id_to_key = (); { my $i = 0; foreach my $key ($index->get_keys()) { $enabled{$key} = 1; $id_to_key{$i} = $key; $i += 1; # delete unnecessary fields my $cdata = $index->get_by_key($key); foreach my $field (keys %{$cdata}) { if (! exists $allowed_fields{$field}) { delete $cdata->{$field}; } } } } write_enabled_pkgs($index, \%enabled, $r); push @data, [$r, $t, $index, \%enabled, \%id_to_key]; } foreach my $v (@data) { my ($tmpname, $t, $index, $enabled, $id_to_key) = @{$v}; randomize_pkgs($index, $enabled, $id_to_key, $tmpname, $wantedexit, @execcmd); } foreach my $v (@data) { my ($tmpname, $t, $index, $enabled, $id_to_key) = @{$v}; iterate_pkgs($index, $enabled, $id_to_key, $tmpname, $wantedexit, @execcmd); } foreach my $v (@data) { my ($tmpname, $t, $index, $enabled, $id_to_key) = @{$v}; if ($t eq "bin") { delete_field($index, $enabled, $id_to_key, 'Depends', $tmpname, $wantedexit, @execcmd); delete_field($index, $enabled, $id_to_key, 'Pre-Depends', $tmpname, $wantedexit, @execcmd); delete_field($index, $enabled, $id_to_key, 'Conflicts', $tmpname, $wantedexit, @execcmd); delete_field($index, $enabled, $id_to_key, 'Breaks', $tmpname, $wantedexit, @execcmd); delete_field($index, $enabled, $id_to_key, 'Provides', $tmpname, $wantedexit, @execcmd); } else { delete_field($index, $enabled, $id_to_key, 'Build-Depends', $tmpname, $wantedexit, @execcmd); delete_field($index, $enabled, $id_to_key, 'Build-Depends-Indep', $tmpname, $wantedexit, @execcmd); delete_field($index, $enabled, $id_to_key, 'Build-Depends-Arch', $tmpname, $wantedexit, @execcmd); delete_field($index, $enabled, $id_to_key, 'Build-Conflicts', $tmpname, $wantedexit, @execcmd); delete_field($index, $enabled, $id_to_key, 'Build-Conflicts-Indep', $tmpname, $wantedexit, @execcmd); delete_field($index, $enabled, $id_to_key, 'Build-Conflicts-Arch', $tmpname, $wantedexit, @execcmd); } } # somehow, calling randomize_pkgs might remove build-essential #randomize_pkgs($index, \%enabled, \%id_to_key); foreach my $v (@data) { my ($tmpname, $t, $index, $enabled, $id_to_key) = @{$v}; iterate_pkgs($index, $enabled, $id_to_key, $tmpname, $wantedexit, @execcmd); } } main();