initial commit
This commit is contained in:
commit
197995a019
1 changed files with 248 additions and 0 deletions
248
shrink.pl
Executable file
248
shrink.pl
Executable file
|
@ -0,0 +1,248 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# this program uses Dpkg::Index to randomly remove packages from the index
|
||||
# until a minimal test case is found.
|
||||
#
|
||||
# Another way to solve this problem is:
|
||||
#
|
||||
# 09:02 < helmut> josch: a tool that has worked well for me for a number of
|
||||
# problems was rolling an exponentially distributed number of
|
||||
# lines from a random point. the lambda parameter was adapted
|
||||
# with the success.
|
||||
# 09:03 < helmut> josch: after finding a minimal set of lines, a
|
||||
# language-specific tool can explode lines. for c that can be
|
||||
# almost as simple as replacing space with newlines. then start
|
||||
# over
|
||||
# 09:04 < helmut> removing a sequence of lines has the benefit of removing
|
||||
# whole functions/paragraphs/etc. and thus producing
|
||||
# syntactically valid stuff occasionally
|
||||
# 09:06 < helmut> I recommend that you spend a bit of time looking into haskell
|
||||
# quickcheck's Arbitrary type class. it has a shrink member,
|
||||
# which essentially does the same thing on haskell data
|
||||
# structures
|
||||
# 09:07 < helmut> drawing inspiration from there might help
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Dpkg::Index;
|
||||
use List::Util 'shuffle';
|
||||
|
||||
sub check {
|
||||
system 'dose-builddebcheck', '--failures', '--explain', '--quiet', '--deb-native-arch=kfreebsd-i386', 'tmp', 'wanna-build-interesting-sources-sid.kfreebsd-i386';
|
||||
my $exit = $? >> 8;
|
||||
if ($exit == 64) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub write_enabled_pkgs {
|
||||
my $index = shift;
|
||||
my $enabled = shift;
|
||||
# write out all the still enabled packages
|
||||
open(my $fh, '>', 'tmp');
|
||||
foreach my $k ($index->get_keys()) {
|
||||
if (!$enabled->{$k}) {
|
||||
next;
|
||||
}
|
||||
print { $fh } $index->get_by_key($k) . "\n";
|
||||
}
|
||||
close $fh;
|
||||
}
|
||||
|
||||
sub randomize_pkgs {
|
||||
my $index = shift;
|
||||
my $enabled = shift;
|
||||
my $id_to_key = shift;
|
||||
# randomly delete 0.5% of the packages until we failed to produce the
|
||||
# expected result 20 consecutive times
|
||||
my $num_failures = 0;
|
||||
while (1) {
|
||||
# fill an array with the remaining enabled indices
|
||||
my @indices = ();
|
||||
my $numpkgs = scalar keys %{$enabled};
|
||||
for (my $i = 0; $i < $numpkgs; $i++) {
|
||||
if (!$enabled->{$id_to_key->{$i}}) {
|
||||
next;
|
||||
}
|
||||
push @indices, $i;
|
||||
}
|
||||
my @shuffled_indices = shuffle(@indices);
|
||||
printf "1st phase: %d/%d\n", scalar @indices, $numpkgs;
|
||||
my $tenpercent = (scalar @indices)*0.005;
|
||||
my @to_disable = @shuffled_indices[ 0 .. $tenpercent - 1 ];
|
||||
my $num_to_disable = scalar @to_disable;
|
||||
print "disabling: $num_to_disable\n";
|
||||
print "acc. failures: $num_failures\n";
|
||||
for my $i (@to_disable) {
|
||||
$enabled->{$id_to_key->{$i}} = 0;
|
||||
}
|
||||
write_enabled_pkgs($index, $enabled);
|
||||
if (check()) {
|
||||
# the error we want to see is still there so we keep these keys
|
||||
# disabled and reset the number of failures
|
||||
$num_failures = 0;
|
||||
} else {
|
||||
# the error vanished
|
||||
# re-enable the formerly disabled ones and increment the number of
|
||||
# failures
|
||||
for my $i (@to_disable) {
|
||||
$enabled->{$id_to_key->{$i}} = 1;
|
||||
}
|
||||
$num_failures += 1;
|
||||
}
|
||||
# if we have unsuccessfully tried running the check as many times as
|
||||
# the current percentage is removing at once, then it starts making
|
||||
# sense to remove packages one-by-one
|
||||
#
|
||||
# In other words: It does not make sense to remove X packages at once
|
||||
# if we are are running check() more than X times.
|
||||
#
|
||||
# We add the factor 2 because the threshold might be reached randomly
|
||||
# as well.
|
||||
if ($num_failures > $num_to_disable*2) {
|
||||
last;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub iterate_pkgs {
|
||||
my $index = shift;
|
||||
my $enabled = shift;
|
||||
my $id_to_key = shift;
|
||||
# we extract the array of enabled indices for better progress reporting
|
||||
# later on
|
||||
my $numpkgs = scalar keys %{$enabled};
|
||||
my @indices = ();
|
||||
for (my $i = 0; $i < $numpkgs; $i++) {
|
||||
if (!$enabled->{$id_to_key->{$i}}) {
|
||||
next;
|
||||
}
|
||||
push @indices, $i;
|
||||
}
|
||||
|
||||
# now delete single packages
|
||||
# we need this stage to make sure to have looked at all packages at least
|
||||
# once. Otherwise it can be that by chance a package was not randomly
|
||||
# selected by the last stage.
|
||||
my $i = 0;
|
||||
foreach my $idx (@indices) {
|
||||
my $key = $id_to_key->{$idx};
|
||||
printf "2nd phase: %d/%d\n", $i, scalar @indices;
|
||||
$i += 1;
|
||||
# disable the current key
|
||||
$enabled->{$key} = 0;
|
||||
write_enabled_pkgs($index, $enabled);
|
||||
if (check()) {
|
||||
# the error we want to see is still there so we keep this key disabled
|
||||
} else {
|
||||
# the error vanished
|
||||
# re-enable the current key
|
||||
$enabled->{$key} = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub delete_field {
|
||||
my $index = shift;
|
||||
my $enabled = shift;
|
||||
my $id_to_key = shift;
|
||||
my $field = shift;
|
||||
# we extract the array of enabled indices for better progress reporting
|
||||
# later on
|
||||
my $numpkgs = scalar keys %{$enabled};
|
||||
my @indices = ();
|
||||
for (my $i = 0; $i < $numpkgs; $i++) {
|
||||
if (!$enabled->{$id_to_key->{$i}}) {
|
||||
next;
|
||||
}
|
||||
push @indices, $i;
|
||||
}
|
||||
|
||||
# now delete single packages
|
||||
# we need this stage to make sure to have looked at all packages at least
|
||||
# once. Otherwise it can be that by chance a package was not randomly
|
||||
# selected by the last stage.
|
||||
my $i = 0;
|
||||
foreach my $idx (@indices) {
|
||||
my $key = $id_to_key->{$idx};
|
||||
printf "3nd phase (deleting $field): %d/%d\n", $i, scalar @indices;
|
||||
$i += 1;
|
||||
# skip if this package doesn't have such a key
|
||||
my $pkg = $index->get_by_key($key);
|
||||
if (!exists $pkg->{$field}) {
|
||||
next;
|
||||
}
|
||||
my $fieldvalue = $pkg->{$field};
|
||||
# disable the current key
|
||||
delete $pkg->{$field};
|
||||
write_enabled_pkgs($index, $enabled);
|
||||
if (check()) {
|
||||
# the error we want to see is still there so we keep this field deleted
|
||||
} else {
|
||||
# the error vanished
|
||||
# restore the package
|
||||
$index->get_by_key($key)->{$field} = $fieldvalue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub main {
|
||||
my $desc = $ARGV[0];
|
||||
if (not defined($desc)) {
|
||||
die "need a Packages file";
|
||||
}
|
||||
|
||||
my $key_func = sub {
|
||||
return $_[0]->{Package} . ' ' . $_[0]->{Version} . ' ' . $_[0]->{Architecture};
|
||||
};
|
||||
|
||||
my $index = Dpkg::Index->new(get_key_func=>$key_func);
|
||||
|
||||
$index->load($desc);
|
||||
my %allowed_fields = (
|
||||
'Version' => 1,
|
||||
'Package' => 1,
|
||||
'Architecture' => 1,
|
||||
'Depends' => 1,
|
||||
# 'Multi-Arch' => 1, # not needed with only one architecture
|
||||
'Provides' => 1,
|
||||
'Breaks' => 1,
|
||||
'Conflicts' => 1,
|
||||
'Pre-Depends' => 1,
|
||||
# 'Essential' => 1, # probably not needed
|
||||
);
|
||||
|
||||
my %enabled = ();
|
||||
my %id_to_key = ();
|
||||
{
|
||||
my $i = 0;
|
||||
foreach my $key ($index->get_keys()) {
|
||||
$enabled{$key} = 1;
|
||||
$id_to_key{$i} = $key;
|
||||
$i += 1;
|
||||
# delete unnecessary fields
|
||||
my $cdata = $index->get_by_key($key);
|
||||
foreach my $field (keys %{$cdata}) {
|
||||
if (! exists $allowed_fields{$field}) {
|
||||
delete $cdata->{$field};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
randomize_pkgs($index, \%enabled, \%id_to_key);
|
||||
iterate_pkgs($index, \%enabled, \%id_to_key);
|
||||
delete_field($index, \%enabled, \%id_to_key, 'Depends');
|
||||
delete_field($index, \%enabled, \%id_to_key, 'Pre-Depends');
|
||||
delete_field($index, \%enabled, \%id_to_key, 'Conflicts');
|
||||
delete_field($index, \%enabled, \%id_to_key, 'Breaks');
|
||||
delete_field($index, \%enabled, \%id_to_key, 'Provides');
|
||||
# somehow, calling randomize_pkgs might remove build-essential
|
||||
#randomize_pkgs($index, \%enabled, \%id_to_key);
|
||||
iterate_pkgs($index, \%enabled, \%id_to_key);
|
||||
}
|
||||
|
||||
main();
|
Loading…
Reference in a new issue