From 197995a019aa2544a28d3fe3b1cd7501720b5f3f Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Wed, 6 Jul 2016 14:29:18 +0200 Subject: [PATCH] initial commit --- shrink.pl | 248 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 248 insertions(+) create mode 100755 shrink.pl diff --git a/shrink.pl b/shrink.pl new file mode 100755 index 0000000..deba89b --- /dev/null +++ b/shrink.pl @@ -0,0 +1,248 @@ +#!/usr/bin/perl + +# this program uses Dpkg::Index to randomly remove packages from the index +# until a minimal test case is found. +# +# Another way to solve this problem is: +# +# 09:02 < helmut> josch: a tool that has worked well for me for a number of +# problems was rolling an exponentially distributed number of +# lines from a random point. the lambda parameter was adapted +# with the success. +# 09:03 < helmut> josch: after finding a minimal set of lines, a +# language-specific tool can explode lines. for c that can be +# almost as simple as replacing space with newlines. then start +# over +# 09:04 < helmut> removing a sequence of lines has the benefit of removing +# whole functions/paragraphs/etc. and thus producing +# syntactically valid stuff occasionally +# 09:06 < helmut> I recommend that you spend a bit of time looking into haskell +# quickcheck's Arbitrary type class. it has a shrink member, +# which essentially does the same thing on haskell data +# structures +# 09:07 < helmut> drawing inspiration from there might help + +use strict; +use warnings; + +use Dpkg::Index; +use List::Util 'shuffle'; + +sub check { + system 'dose-builddebcheck', '--failures', '--explain', '--quiet', '--deb-native-arch=kfreebsd-i386', 'tmp', 'wanna-build-interesting-sources-sid.kfreebsd-i386'; + my $exit = $? >> 8; + if ($exit == 64) { + return 1; + } else { + return 0; + } +} + +sub write_enabled_pkgs { + my $index = shift; + my $enabled = shift; + # write out all the still enabled packages + open(my $fh, '>', 'tmp'); + foreach my $k ($index->get_keys()) { + if (!$enabled->{$k}) { + next; + } + print { $fh } $index->get_by_key($k) . "\n"; + } + close $fh; +} + +sub randomize_pkgs { + my $index = shift; + my $enabled = shift; + my $id_to_key = shift; + # randomly delete 0.5% of the packages until we failed to produce the + # expected result 20 consecutive times + my $num_failures = 0; + while (1) { + # fill an array with the remaining enabled indices + my @indices = (); + my $numpkgs = scalar keys %{$enabled}; + for (my $i = 0; $i < $numpkgs; $i++) { + if (!$enabled->{$id_to_key->{$i}}) { + next; + } + push @indices, $i; + } + my @shuffled_indices = shuffle(@indices); + printf "1st phase: %d/%d\n", scalar @indices, $numpkgs; + my $tenpercent = (scalar @indices)*0.005; + my @to_disable = @shuffled_indices[ 0 .. $tenpercent - 1 ]; + my $num_to_disable = scalar @to_disable; + print "disabling: $num_to_disable\n"; + print "acc. failures: $num_failures\n"; + for my $i (@to_disable) { + $enabled->{$id_to_key->{$i}} = 0; + } + write_enabled_pkgs($index, $enabled); + if (check()) { + # the error we want to see is still there so we keep these keys + # disabled and reset the number of failures + $num_failures = 0; + } else { + # the error vanished + # re-enable the formerly disabled ones and increment the number of + # failures + for my $i (@to_disable) { + $enabled->{$id_to_key->{$i}} = 1; + } + $num_failures += 1; + } + # if we have unsuccessfully tried running the check as many times as + # the current percentage is removing at once, then it starts making + # sense to remove packages one-by-one + # + # In other words: It does not make sense to remove X packages at once + # if we are are running check() more than X times. + # + # We add the factor 2 because the threshold might be reached randomly + # as well. + if ($num_failures > $num_to_disable*2) { + last; + } + } +} + +sub iterate_pkgs { + my $index = shift; + my $enabled = shift; + my $id_to_key = shift; + # we extract the array of enabled indices for better progress reporting + # later on + my $numpkgs = scalar keys %{$enabled}; + my @indices = (); + for (my $i = 0; $i < $numpkgs; $i++) { + if (!$enabled->{$id_to_key->{$i}}) { + next; + } + push @indices, $i; + } + + # now delete single packages + # we need this stage to make sure to have looked at all packages at least + # once. Otherwise it can be that by chance a package was not randomly + # selected by the last stage. + my $i = 0; + foreach my $idx (@indices) { + my $key = $id_to_key->{$idx}; + printf "2nd phase: %d/%d\n", $i, scalar @indices; + $i += 1; + # disable the current key + $enabled->{$key} = 0; + write_enabled_pkgs($index, $enabled); + if (check()) { + # the error we want to see is still there so we keep this key disabled + } else { + # the error vanished + # re-enable the current key + $enabled->{$key} = 1; + } + } +} + +sub delete_field { + my $index = shift; + my $enabled = shift; + my $id_to_key = shift; + my $field = shift; + # we extract the array of enabled indices for better progress reporting + # later on + my $numpkgs = scalar keys %{$enabled}; + my @indices = (); + for (my $i = 0; $i < $numpkgs; $i++) { + if (!$enabled->{$id_to_key->{$i}}) { + next; + } + push @indices, $i; + } + + # now delete single packages + # we need this stage to make sure to have looked at all packages at least + # once. Otherwise it can be that by chance a package was not randomly + # selected by the last stage. + my $i = 0; + foreach my $idx (@indices) { + my $key = $id_to_key->{$idx}; + printf "3nd phase (deleting $field): %d/%d\n", $i, scalar @indices; + $i += 1; + # skip if this package doesn't have such a key + my $pkg = $index->get_by_key($key); + if (!exists $pkg->{$field}) { + next; + } + my $fieldvalue = $pkg->{$field}; + # disable the current key + delete $pkg->{$field}; + write_enabled_pkgs($index, $enabled); + if (check()) { + # the error we want to see is still there so we keep this field deleted + } else { + # the error vanished + # restore the package + $index->get_by_key($key)->{$field} = $fieldvalue; + } + } +} + +sub main { + my $desc = $ARGV[0]; + if (not defined($desc)) { + die "need a Packages file"; + } + + my $key_func = sub { + return $_[0]->{Package} . ' ' . $_[0]->{Version} . ' ' . $_[0]->{Architecture}; + }; + + my $index = Dpkg::Index->new(get_key_func=>$key_func); + + $index->load($desc); + my %allowed_fields = ( + 'Version' => 1, + 'Package' => 1, + 'Architecture' => 1, + 'Depends' => 1, + # 'Multi-Arch' => 1, # not needed with only one architecture + 'Provides' => 1, + 'Breaks' => 1, + 'Conflicts' => 1, + 'Pre-Depends' => 1, + # 'Essential' => 1, # probably not needed + ); + + my %enabled = (); + my %id_to_key = (); + { + my $i = 0; + foreach my $key ($index->get_keys()) { + $enabled{$key} = 1; + $id_to_key{$i} = $key; + $i += 1; + # delete unnecessary fields + my $cdata = $index->get_by_key($key); + foreach my $field (keys %{$cdata}) { + if (! exists $allowed_fields{$field}) { + delete $cdata->{$field}; + } + } + } + } + + randomize_pkgs($index, \%enabled, \%id_to_key); + iterate_pkgs($index, \%enabled, \%id_to_key); + delete_field($index, \%enabled, \%id_to_key, 'Depends'); + delete_field($index, \%enabled, \%id_to_key, 'Pre-Depends'); + delete_field($index, \%enabled, \%id_to_key, 'Conflicts'); + delete_field($index, \%enabled, \%id_to_key, 'Breaks'); + delete_field($index, \%enabled, \%id_to_key, 'Provides'); + # somehow, calling randomize_pkgs might remove build-essential + #randomize_pkgs($index, \%enabled, \%id_to_key); + iterate_pkgs($index, \%enabled, \%id_to_key); +} + +main();