initial commit

This commit is contained in:
Johannes 'josch' Schauer 2016-07-06 14:29:18 +02:00
commit 197995a019

248
shrink.pl Executable file
View file

@ -0,0 +1,248 @@
#!/usr/bin/perl
# this program uses Dpkg::Index to randomly remove packages from the index
# until a minimal test case is found.
#
# Another way to solve this problem is:
#
# 09:02 < helmut> josch: a tool that has worked well for me for a number of
# problems was rolling an exponentially distributed number of
# lines from a random point. the lambda parameter was adapted
# with the success.
# 09:03 < helmut> josch: after finding a minimal set of lines, a
# language-specific tool can explode lines. for c that can be
# almost as simple as replacing space with newlines. then start
# over
# 09:04 < helmut> removing a sequence of lines has the benefit of removing
# whole functions/paragraphs/etc. and thus producing
# syntactically valid stuff occasionally
# 09:06 < helmut> I recommend that you spend a bit of time looking into haskell
# quickcheck's Arbitrary type class. it has a shrink member,
# which essentially does the same thing on haskell data
# structures
# 09:07 < helmut> drawing inspiration from there might help
use strict;
use warnings;
use Dpkg::Index;
use List::Util 'shuffle';
sub check {
system 'dose-builddebcheck', '--failures', '--explain', '--quiet', '--deb-native-arch=kfreebsd-i386', 'tmp', 'wanna-build-interesting-sources-sid.kfreebsd-i386';
my $exit = $? >> 8;
if ($exit == 64) {
return 1;
} else {
return 0;
}
}
sub write_enabled_pkgs {
my $index = shift;
my $enabled = shift;
# write out all the still enabled packages
open(my $fh, '>', 'tmp');
foreach my $k ($index->get_keys()) {
if (!$enabled->{$k}) {
next;
}
print { $fh } $index->get_by_key($k) . "\n";
}
close $fh;
}
sub randomize_pkgs {
my $index = shift;
my $enabled = shift;
my $id_to_key = shift;
# randomly delete 0.5% of the packages until we failed to produce the
# expected result 20 consecutive times
my $num_failures = 0;
while (1) {
# fill an array with the remaining enabled indices
my @indices = ();
my $numpkgs = scalar keys %{$enabled};
for (my $i = 0; $i < $numpkgs; $i++) {
if (!$enabled->{$id_to_key->{$i}}) {
next;
}
push @indices, $i;
}
my @shuffled_indices = shuffle(@indices);
printf "1st phase: %d/%d\n", scalar @indices, $numpkgs;
my $tenpercent = (scalar @indices)*0.005;
my @to_disable = @shuffled_indices[ 0 .. $tenpercent - 1 ];
my $num_to_disable = scalar @to_disable;
print "disabling: $num_to_disable\n";
print "acc. failures: $num_failures\n";
for my $i (@to_disable) {
$enabled->{$id_to_key->{$i}} = 0;
}
write_enabled_pkgs($index, $enabled);
if (check()) {
# the error we want to see is still there so we keep these keys
# disabled and reset the number of failures
$num_failures = 0;
} else {
# the error vanished
# re-enable the formerly disabled ones and increment the number of
# failures
for my $i (@to_disable) {
$enabled->{$id_to_key->{$i}} = 1;
}
$num_failures += 1;
}
# if we have unsuccessfully tried running the check as many times as
# the current percentage is removing at once, then it starts making
# sense to remove packages one-by-one
#
# In other words: It does not make sense to remove X packages at once
# if we are are running check() more than X times.
#
# We add the factor 2 because the threshold might be reached randomly
# as well.
if ($num_failures > $num_to_disable*2) {
last;
}
}
}
sub iterate_pkgs {
my $index = shift;
my $enabled = shift;
my $id_to_key = shift;
# we extract the array of enabled indices for better progress reporting
# later on
my $numpkgs = scalar keys %{$enabled};
my @indices = ();
for (my $i = 0; $i < $numpkgs; $i++) {
if (!$enabled->{$id_to_key->{$i}}) {
next;
}
push @indices, $i;
}
# now delete single packages
# we need this stage to make sure to have looked at all packages at least
# once. Otherwise it can be that by chance a package was not randomly
# selected by the last stage.
my $i = 0;
foreach my $idx (@indices) {
my $key = $id_to_key->{$idx};
printf "2nd phase: %d/%d\n", $i, scalar @indices;
$i += 1;
# disable the current key
$enabled->{$key} = 0;
write_enabled_pkgs($index, $enabled);
if (check()) {
# the error we want to see is still there so we keep this key disabled
} else {
# the error vanished
# re-enable the current key
$enabled->{$key} = 1;
}
}
}
sub delete_field {
my $index = shift;
my $enabled = shift;
my $id_to_key = shift;
my $field = shift;
# we extract the array of enabled indices for better progress reporting
# later on
my $numpkgs = scalar keys %{$enabled};
my @indices = ();
for (my $i = 0; $i < $numpkgs; $i++) {
if (!$enabled->{$id_to_key->{$i}}) {
next;
}
push @indices, $i;
}
# now delete single packages
# we need this stage to make sure to have looked at all packages at least
# once. Otherwise it can be that by chance a package was not randomly
# selected by the last stage.
my $i = 0;
foreach my $idx (@indices) {
my $key = $id_to_key->{$idx};
printf "3nd phase (deleting $field): %d/%d\n", $i, scalar @indices;
$i += 1;
# skip if this package doesn't have such a key
my $pkg = $index->get_by_key($key);
if (!exists $pkg->{$field}) {
next;
}
my $fieldvalue = $pkg->{$field};
# disable the current key
delete $pkg->{$field};
write_enabled_pkgs($index, $enabled);
if (check()) {
# the error we want to see is still there so we keep this field deleted
} else {
# the error vanished
# restore the package
$index->get_by_key($key)->{$field} = $fieldvalue;
}
}
}
sub main {
my $desc = $ARGV[0];
if (not defined($desc)) {
die "need a Packages file";
}
my $key_func = sub {
return $_[0]->{Package} . ' ' . $_[0]->{Version} . ' ' . $_[0]->{Architecture};
};
my $index = Dpkg::Index->new(get_key_func=>$key_func);
$index->load($desc);
my %allowed_fields = (
'Version' => 1,
'Package' => 1,
'Architecture' => 1,
'Depends' => 1,
# 'Multi-Arch' => 1, # not needed with only one architecture
'Provides' => 1,
'Breaks' => 1,
'Conflicts' => 1,
'Pre-Depends' => 1,
# 'Essential' => 1, # probably not needed
);
my %enabled = ();
my %id_to_key = ();
{
my $i = 0;
foreach my $key ($index->get_keys()) {
$enabled{$key} = 1;
$id_to_key{$i} = $key;
$i += 1;
# delete unnecessary fields
my $cdata = $index->get_by_key($key);
foreach my $field (keys %{$cdata}) {
if (! exists $allowed_fields{$field}) {
delete $cdata->{$field};
}
}
}
}
randomize_pkgs($index, \%enabled, \%id_to_key);
iterate_pkgs($index, \%enabled, \%id_to_key);
delete_field($index, \%enabled, \%id_to_key, 'Depends');
delete_field($index, \%enabled, \%id_to_key, 'Pre-Depends');
delete_field($index, \%enabled, \%id_to_key, 'Conflicts');
delete_field($index, \%enabled, \%id_to_key, 'Breaks');
delete_field($index, \%enabled, \%id_to_key, 'Provides');
# somehow, calling randomize_pkgs might remove build-essential
#randomize_pkgs($index, \%enabled, \%id_to_key);
iterate_pkgs($index, \%enabled, \%id_to_key);
}
main();