initial commit
This commit is contained in:
commit
197995a019
1 changed files with 248 additions and 0 deletions
248
shrink.pl
Executable file
248
shrink.pl
Executable file
|
@ -0,0 +1,248 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
# this program uses Dpkg::Index to randomly remove packages from the index
|
||||||
|
# until a minimal test case is found.
|
||||||
|
#
|
||||||
|
# Another way to solve this problem is:
|
||||||
|
#
|
||||||
|
# 09:02 < helmut> josch: a tool that has worked well for me for a number of
|
||||||
|
# problems was rolling an exponentially distributed number of
|
||||||
|
# lines from a random point. the lambda parameter was adapted
|
||||||
|
# with the success.
|
||||||
|
# 09:03 < helmut> josch: after finding a minimal set of lines, a
|
||||||
|
# language-specific tool can explode lines. for c that can be
|
||||||
|
# almost as simple as replacing space with newlines. then start
|
||||||
|
# over
|
||||||
|
# 09:04 < helmut> removing a sequence of lines has the benefit of removing
|
||||||
|
# whole functions/paragraphs/etc. and thus producing
|
||||||
|
# syntactically valid stuff occasionally
|
||||||
|
# 09:06 < helmut> I recommend that you spend a bit of time looking into haskell
|
||||||
|
# quickcheck's Arbitrary type class. it has a shrink member,
|
||||||
|
# which essentially does the same thing on haskell data
|
||||||
|
# structures
|
||||||
|
# 09:07 < helmut> drawing inspiration from there might help
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
use Dpkg::Index;
|
||||||
|
use List::Util 'shuffle';
|
||||||
|
|
||||||
|
sub check {
|
||||||
|
system 'dose-builddebcheck', '--failures', '--explain', '--quiet', '--deb-native-arch=kfreebsd-i386', 'tmp', 'wanna-build-interesting-sources-sid.kfreebsd-i386';
|
||||||
|
my $exit = $? >> 8;
|
||||||
|
if ($exit == 64) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub write_enabled_pkgs {
|
||||||
|
my $index = shift;
|
||||||
|
my $enabled = shift;
|
||||||
|
# write out all the still enabled packages
|
||||||
|
open(my $fh, '>', 'tmp');
|
||||||
|
foreach my $k ($index->get_keys()) {
|
||||||
|
if (!$enabled->{$k}) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
print { $fh } $index->get_by_key($k) . "\n";
|
||||||
|
}
|
||||||
|
close $fh;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub randomize_pkgs {
|
||||||
|
my $index = shift;
|
||||||
|
my $enabled = shift;
|
||||||
|
my $id_to_key = shift;
|
||||||
|
# randomly delete 0.5% of the packages until we failed to produce the
|
||||||
|
# expected result 20 consecutive times
|
||||||
|
my $num_failures = 0;
|
||||||
|
while (1) {
|
||||||
|
# fill an array with the remaining enabled indices
|
||||||
|
my @indices = ();
|
||||||
|
my $numpkgs = scalar keys %{$enabled};
|
||||||
|
for (my $i = 0; $i < $numpkgs; $i++) {
|
||||||
|
if (!$enabled->{$id_to_key->{$i}}) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
push @indices, $i;
|
||||||
|
}
|
||||||
|
my @shuffled_indices = shuffle(@indices);
|
||||||
|
printf "1st phase: %d/%d\n", scalar @indices, $numpkgs;
|
||||||
|
my $tenpercent = (scalar @indices)*0.005;
|
||||||
|
my @to_disable = @shuffled_indices[ 0 .. $tenpercent - 1 ];
|
||||||
|
my $num_to_disable = scalar @to_disable;
|
||||||
|
print "disabling: $num_to_disable\n";
|
||||||
|
print "acc. failures: $num_failures\n";
|
||||||
|
for my $i (@to_disable) {
|
||||||
|
$enabled->{$id_to_key->{$i}} = 0;
|
||||||
|
}
|
||||||
|
write_enabled_pkgs($index, $enabled);
|
||||||
|
if (check()) {
|
||||||
|
# the error we want to see is still there so we keep these keys
|
||||||
|
# disabled and reset the number of failures
|
||||||
|
$num_failures = 0;
|
||||||
|
} else {
|
||||||
|
# the error vanished
|
||||||
|
# re-enable the formerly disabled ones and increment the number of
|
||||||
|
# failures
|
||||||
|
for my $i (@to_disable) {
|
||||||
|
$enabled->{$id_to_key->{$i}} = 1;
|
||||||
|
}
|
||||||
|
$num_failures += 1;
|
||||||
|
}
|
||||||
|
# if we have unsuccessfully tried running the check as many times as
|
||||||
|
# the current percentage is removing at once, then it starts making
|
||||||
|
# sense to remove packages one-by-one
|
||||||
|
#
|
||||||
|
# In other words: It does not make sense to remove X packages at once
|
||||||
|
# if we are are running check() more than X times.
|
||||||
|
#
|
||||||
|
# We add the factor 2 because the threshold might be reached randomly
|
||||||
|
# as well.
|
||||||
|
if ($num_failures > $num_to_disable*2) {
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub iterate_pkgs {
|
||||||
|
my $index = shift;
|
||||||
|
my $enabled = shift;
|
||||||
|
my $id_to_key = shift;
|
||||||
|
# we extract the array of enabled indices for better progress reporting
|
||||||
|
# later on
|
||||||
|
my $numpkgs = scalar keys %{$enabled};
|
||||||
|
my @indices = ();
|
||||||
|
for (my $i = 0; $i < $numpkgs; $i++) {
|
||||||
|
if (!$enabled->{$id_to_key->{$i}}) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
push @indices, $i;
|
||||||
|
}
|
||||||
|
|
||||||
|
# now delete single packages
|
||||||
|
# we need this stage to make sure to have looked at all packages at least
|
||||||
|
# once. Otherwise it can be that by chance a package was not randomly
|
||||||
|
# selected by the last stage.
|
||||||
|
my $i = 0;
|
||||||
|
foreach my $idx (@indices) {
|
||||||
|
my $key = $id_to_key->{$idx};
|
||||||
|
printf "2nd phase: %d/%d\n", $i, scalar @indices;
|
||||||
|
$i += 1;
|
||||||
|
# disable the current key
|
||||||
|
$enabled->{$key} = 0;
|
||||||
|
write_enabled_pkgs($index, $enabled);
|
||||||
|
if (check()) {
|
||||||
|
# the error we want to see is still there so we keep this key disabled
|
||||||
|
} else {
|
||||||
|
# the error vanished
|
||||||
|
# re-enable the current key
|
||||||
|
$enabled->{$key} = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub delete_field {
|
||||||
|
my $index = shift;
|
||||||
|
my $enabled = shift;
|
||||||
|
my $id_to_key = shift;
|
||||||
|
my $field = shift;
|
||||||
|
# we extract the array of enabled indices for better progress reporting
|
||||||
|
# later on
|
||||||
|
my $numpkgs = scalar keys %{$enabled};
|
||||||
|
my @indices = ();
|
||||||
|
for (my $i = 0; $i < $numpkgs; $i++) {
|
||||||
|
if (!$enabled->{$id_to_key->{$i}}) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
push @indices, $i;
|
||||||
|
}
|
||||||
|
|
||||||
|
# now delete single packages
|
||||||
|
# we need this stage to make sure to have looked at all packages at least
|
||||||
|
# once. Otherwise it can be that by chance a package was not randomly
|
||||||
|
# selected by the last stage.
|
||||||
|
my $i = 0;
|
||||||
|
foreach my $idx (@indices) {
|
||||||
|
my $key = $id_to_key->{$idx};
|
||||||
|
printf "3nd phase (deleting $field): %d/%d\n", $i, scalar @indices;
|
||||||
|
$i += 1;
|
||||||
|
# skip if this package doesn't have such a key
|
||||||
|
my $pkg = $index->get_by_key($key);
|
||||||
|
if (!exists $pkg->{$field}) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
my $fieldvalue = $pkg->{$field};
|
||||||
|
# disable the current key
|
||||||
|
delete $pkg->{$field};
|
||||||
|
write_enabled_pkgs($index, $enabled);
|
||||||
|
if (check()) {
|
||||||
|
# the error we want to see is still there so we keep this field deleted
|
||||||
|
} else {
|
||||||
|
# the error vanished
|
||||||
|
# restore the package
|
||||||
|
$index->get_by_key($key)->{$field} = $fieldvalue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub main {
|
||||||
|
my $desc = $ARGV[0];
|
||||||
|
if (not defined($desc)) {
|
||||||
|
die "need a Packages file";
|
||||||
|
}
|
||||||
|
|
||||||
|
my $key_func = sub {
|
||||||
|
return $_[0]->{Package} . ' ' . $_[0]->{Version} . ' ' . $_[0]->{Architecture};
|
||||||
|
};
|
||||||
|
|
||||||
|
my $index = Dpkg::Index->new(get_key_func=>$key_func);
|
||||||
|
|
||||||
|
$index->load($desc);
|
||||||
|
my %allowed_fields = (
|
||||||
|
'Version' => 1,
|
||||||
|
'Package' => 1,
|
||||||
|
'Architecture' => 1,
|
||||||
|
'Depends' => 1,
|
||||||
|
# 'Multi-Arch' => 1, # not needed with only one architecture
|
||||||
|
'Provides' => 1,
|
||||||
|
'Breaks' => 1,
|
||||||
|
'Conflicts' => 1,
|
||||||
|
'Pre-Depends' => 1,
|
||||||
|
# 'Essential' => 1, # probably not needed
|
||||||
|
);
|
||||||
|
|
||||||
|
my %enabled = ();
|
||||||
|
my %id_to_key = ();
|
||||||
|
{
|
||||||
|
my $i = 0;
|
||||||
|
foreach my $key ($index->get_keys()) {
|
||||||
|
$enabled{$key} = 1;
|
||||||
|
$id_to_key{$i} = $key;
|
||||||
|
$i += 1;
|
||||||
|
# delete unnecessary fields
|
||||||
|
my $cdata = $index->get_by_key($key);
|
||||||
|
foreach my $field (keys %{$cdata}) {
|
||||||
|
if (! exists $allowed_fields{$field}) {
|
||||||
|
delete $cdata->{$field};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
randomize_pkgs($index, \%enabled, \%id_to_key);
|
||||||
|
iterate_pkgs($index, \%enabled, \%id_to_key);
|
||||||
|
delete_field($index, \%enabled, \%id_to_key, 'Depends');
|
||||||
|
delete_field($index, \%enabled, \%id_to_key, 'Pre-Depends');
|
||||||
|
delete_field($index, \%enabled, \%id_to_key, 'Conflicts');
|
||||||
|
delete_field($index, \%enabled, \%id_to_key, 'Breaks');
|
||||||
|
delete_field($index, \%enabled, \%id_to_key, 'Provides');
|
||||||
|
# somehow, calling randomize_pkgs might remove build-essential
|
||||||
|
#randomize_pkgs($index, \%enabled, \%id_to_key);
|
||||||
|
iterate_pkgs($index, \%enabled, \%id_to_key);
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
Loading…
Reference in a new issue