This commit is contained in:
Johannes Schauer Marin Rodrigues 2023-12-30 22:27:50 +01:00
parent a3d8611d19
commit b2ad645561
Signed by: josch
GPG key ID: F2CBA5C78FBD83E1
12 changed files with 369340 additions and 224 deletions

View file

@ -1,14 +1,15 @@
testinput: run.py
./run.py /var/lib/apt/lists/ftp.debian.org_debian_dists_sid_main_binary-amd64_Packages > testinput
.PHONY: all
all: versions.rand versions.sorted testdpkg testapt testdose
set -e; for t in testapt.py testdpkg.pl testdpkg testapt testdose; do \
printf "running $$t..." >&2; \
./$$t < versions.rand | cmp versions.sorted - && echo OK >&2; \
done
testinput.1000: run.py
./run.py /var/lib/apt/lists/ftp.debian.org_debian_dists_sid_main_binary-amd64_Packages | awk 'NR % 1000 == 1' > testinput.1000
testinput.unequal: testinput inequalityfilter
./inequalityfilter < testinput > testinput.unequal
testinput.unequal.1000: testinput inequalityfilter
./inequalityfilter < testinput | awk 'NR % 1000 == 1' > testinput.unequal.1000
versions.rand: versions.sorted
# create a randomized version list using a reproducible seed value
{ openssl enc -aes-256-ctr -pass pass:1612543740 -nosalt </dev/zero 2>/dev/null 4<&- | { \
tr ' ' '\n' <versions.sorted 3<&- | sort --random-source=/proc/self/fd/3 --random-sort; \
} 3<&0 <&4 4<&-; } 4<&0 > versions.rand
testdpkg: testdpkg.c
gcc testdpkg.c -ldpkg -o testdpkg
@ -16,34 +17,25 @@ testdpkg: testdpkg.c
testapt: testapt.cc
g++ testapt.cc -lapt-pkg -o testapt
testdose: testdose.ml
ocamlfind ocamlc -package dose3 -linkpkg testdose.ml -o testdose
testdose:
ocamlfind ocamlc -package dose3.versioning -linkpkg testdose.ml -o testdose
inequalityfilter: inequalityfilter.cc
g++ inequalityfilter.cc -lapt-pkg -o inequalityfilter
.PHONY: test
test: testinput testdpkg testapt testdose rundpkg.sh
./rundpkg.sh < testinput | md5sum
./testapt < testinput | md5sum
./testdose < testinput | md5sum
.PHONY: test.1000
test.1000: testinput.1000 testdpkg testapt testdose rundpkg.sh
./rundpkg.sh < testinput.1000 | md5sum
./testapt < testinput.1000 | md5sum
./testdose < testinput.1000 | md5sum
.PHONY: testsort
testsort: testinput.unequal testapt testsort.sh
./testapt < testinput.unequal | md5sum
./testsort.sh < testinput.unequal | md5sum
.PHONY: testsort.1000
testsort.1000: testinput.unequal.1000 testapt testsort.sh
./testapt < testinput.unequal.1000 | md5sum
./testsort.sh < testinput.unequal.1000 | md5sum
versions.sorted:
wget https://metasnap.debian.net/by-package/debian.sqlite3
wget https://metasnap.debian.net/by-package/debian-backports.sqlite3
wget https://metasnap.debian.net/by-package/debian-debug.sqlite3
wget https://metasnap.debian.net/by-package/debian-ports.sqlite3
wget https://metasnap.debian.net/by-package/debian-security.sqlite3
wget https://metasnap.debian.net/by-package/debian-volatile.sqlite3
{ sqlite3 debian.sqlite3 "select name from vers"; \
sqlite3 debian-backports.sqlite3 "select name from vers"; \
sqlite3 debian-debug.sqlite3 "select name from vers"; \
sqlite3 debian-ports.sqlite3 "select name from vers"; \
sqlite3 debian-security.sqlite3 "select name from vers"; \
sqlite3 debian-volatile.sqlite3 "select name from vers"; \
} | sort -u | grep '^[0-9]' | ./testapt.py > versions.sorted
rm debian.sqlite3 debian-backports.sqlite3 debian-debug.sqlite3 debian-ports.sqlite3 debian-security.sqlite3 debian-volatile.sqlite3
.PHONY: clean
clean:
rm -f testapt testdose testdose.cmi testdose.cmo testdpkg testinput inequalityfilter
rm -f testapt testdose testdose.cmi testdose.cmo testdpkg versions.rand

View file

@ -1,18 +1,23 @@
This is to test the several implementations of Debian version comparison
algorithms.
Comparing Debian versions correctly is hard. This project allows comparing
existing implementation by sorting a text file containing all Debian versions
that have been used since 2005.
Run it via `make test`.
To run it, make sure you have the following packages installed:
`run.py` generates a file containing all permutations of pairs of versions. The
different programs then check whether versions are greater than, equal or less
than one another and output >, = and <, respectively in order. It is then
checked whether the output of all implementations is the same.
$ sudo apt install build-essenital libdpkg-dev libapt-pkg-dev libextlib-ocaml-dev ocaml-findlib ocaml libdose3-ocaml-dev
Currently, this checks the libdpkg C implementation, libapt and dose3.
Then execute:
$ make
The following parsers are currently tested:
- dpkg (C and Perl)
- apt (C++ and Python)
- dose3 (OCaml)
Additionally, the following parsers could be added:
- libdpkg-perl
- libben-ocaml (in lib/benl_base.ml and lib/benl_dpkg.c)
The following do not use their own parser:
@ -21,3 +26,9 @@ The following do not use their own parser:
- libghc-dpkg-dev uses libdpkg-dev
- ruby-debian uses libapt-pkg-dev
- dpkg-dev-el uses `dpkg --compare-versions`
Each of the scripts parses a list of versions coming from standard input, one
per line, and writes them in sorted ascending order to standard output, one per
line. Versions that compare as equal are written on the same line,
lexicographically sorted and separated by space. The ground truth output is
stored in the file `versions.sorted`.

View file

@ -1,41 +0,0 @@
#include <apt-pkg/debversion.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
char *line = NULL;
char *token;
char *orig1;
char *orig2;
char *ver1;
char *ver2;
size_t len = 0;
size_t read;
int ret;
while ((read = getline(&line, &len, stdin)) != -1) {
//fprintf(stderr, "%s", line);
orig1 = strdup(line);
orig2 = strdup(line);
token = orig1;
ver1 = strsep(&token, "\t");
if (ver1 == NULL) {
fprintf(stderr, "cannot read token1");
exit(EXIT_FAILURE);
}
ver2 = strsep(&token, "\n");
if (ver2 == NULL) {
fprintf(stderr, "cannot read token2");
exit(EXIT_FAILURE);
}
ret = debVS.CmpVersion(ver1, ver2);
if (ret != 0) {
fputs(orig2, stdout);
}
//fputs(orig2, stdout);
free(orig1);
free(orig2);
}
exit(EXIT_SUCCESS);
}

22
run.py
View file

@ -1,22 +0,0 @@
#!/usr/bin/env python
from __future__ import print_function
import deb822
import sys
versions = set()
with open(sys.argv[1]) as f:
for pkg in deb822.Deb822.iter_paragraphs(f):
ver = pkg.get('Version')
if ver:
versions.add(ver)
# create a list of all possible combinations of two versions by iterating
# through the list of versions and comparing every version with all that come
# after it
versions = sorted(versions)
l = len(versions)
for i,v1 in enumerate(versions):
print("%f %%\r"%((i*100.0)/l), file=sys.stderr, end="")
for v2 in versions[i+1:]:
print("%s\t%s"%(v1,v2))

View file

@ -1,3 +0,0 @@
#!/bin/sh
# we need to do this because of bug#47214
split --lines=1000000 --numeric-suffixes --suffix-length=4 --filter='./testdpkg'

View file

@ -3,47 +3,44 @@
#include <stdlib.h>
#include <string.h>
int cmpver(const void *a, const void *b) {
int ret = debVS.CmpVersion(*(const char **) a, *(const char **) b);
if (ret == 0) {
return strcmp(*(const char **) a, *(const char **) b);
}
return ret;
}
int main()
{
char *line = NULL;
char *token;
char *orig1;
char *orig2;
char *ver1;
char *ver2;
size_t len = 0;
size_t read;
int ret;
char **versions = NULL;
size_t num_versions = 0;
while ((read = getline(&line, &len, stdin)) != -1) {
//fprintf(stderr, "%s", line);
orig1 = strdup(line);
orig2 = strdup(line);
token = orig1;
ver1 = strsep(&token, "\t");
if (ver1 == NULL) {
fprintf(stderr, "cannot read token1");
exit(EXIT_FAILURE);
}
ver2 = strsep(&token, "\n");
if (ver2 == NULL) {
fprintf(stderr, "cannot read token2");
exit(EXIT_FAILURE);
}
ret = debVS.CmpVersion(ver1, ver2);
if (ret == 0) {
fputc('=', stdout);
} else if (ret > 0) {
fputc('>', stdout);
} else {
fputc('<', stdout);
}
/*
fputc(' ', stdout);
fputs(orig2, stdout);
*/
//fputs(orig2, stdout);
free(orig1);
free(orig2);
}
num_versions += 1;
versions = (char **)realloc(versions, sizeof(char *)*num_versions);
if (versions == NULL) {
perror("malloc failed");
exit(EXIT_FAILURE);
}
if (line[read-1] == '\n') {
line[read-1] = '\0';
}
versions[num_versions-1] = strdup(line);
}
qsort(versions, num_versions, sizeof(char *), cmpver);
printf(versions[0]);
for (int i = 1; i < num_versions; i++) {
if (debVS.CmpVersion(versions[i-1], versions[i]) == 0) {
printf(" ");
} else {
printf("\n");
}
printf(versions[i]);
}
printf("\n");
exit(EXIT_SUCCESS);
}

29
testapt.py Executable file
View file

@ -0,0 +1,29 @@
#!/usr/bin/env python3
import sys
from functools import cmp_to_key
import apt_pkg
apt_pkg.init_system()
def myvercomp(a, b):
ret = apt_pkg.version_compare(a, b)
if ret == 0:
return 1 if a > b else -1
return ret
l = [line.rstrip("\n") for line in sys.stdin]
l = sorted(l, key=cmp_to_key(myvercomp))
last = l.pop(0)
print(last, end="")
for e in l:
if apt_pkg.version_compare(last, e) == 0:
print(" ", end="")
else:
print()
last = e
print(e, end="")
print()

View file

@ -1,12 +1,17 @@
open Debian
open ExtLib
let main () =
List.iter (fun line ->
let ver1, ver2 = String.split line "\t" in
let ret = Version.compare ver1 ver2 in
print_char (if ret > 0 then '>' else if ret < 0 then '<' else '=')
) (Std.input_list stdin);
let versions = List.sort ~cmp:(fun a b ->
let ret = Dose_versioning.Debian.compare a b in
if ret == 0 then compare a b
else ret
) (Std.input_list stdin) in
print_string (List.hd versions);
ignore (List.fold_left (fun acc e ->
if Dose_versioning.Debian.compare acc e == 0 then print_char ' '
else print_char '\n'; print_string e; e
) (List.hd versions) (List.tl versions));
print_char '\n';
;;
main ();;

View file

@ -1,57 +1,62 @@
#define LIBDPKG_VOLATILE_API 1
#define _GNU_SOURCE
#include <dpkg/dpkg.h>
#include <dpkg/dpkg-db.h>
#include <dpkg/version.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
// we need to keep track of the original string because some version strings
// include the implicit epoch of zero
struct myver {
struct dpkg_version dv;
char *sv;
};
int cmpver(const void *a, const void *b) {
int ret = dpkg_version_compare(&(((struct myver *)a)->dv), &(((struct myver *)b)->dv));
if (ret == 0) {
return strcmp(((struct myver *)a)->sv, ((struct myver *)b)->sv);
}
return ret;
}
int main()
{
char *line = NULL;
char *token;
char *orig1;
char *orig2;
char *ver1;
char *ver2;
size_t len = 0;
size_t read;
struct dpkg_version a, b;
int ret;
struct dpkg_version a;
struct myver *versions = NULL;
size_t num_versions = 0;
while ((read = getline(&line, &len, stdin)) != -1) {
//fprintf(stderr, "%s", line);
orig1 = strdup(line);
orig2 = strdup(line);
token = orig1;
ver1 = strsep(&token, "\t");
if (ver1 == NULL) {
fprintf(stderr, "cannot read token1");
exit(EXIT_FAILURE);
}
if(parseversion(&a, ver1, NULL)) {
fprintf(stderr, "cannot parse version1 %s\n", ver1);
exit(EXIT_FAILURE);
}
ver2 = strsep(&token, "\n");
if (ver2 == NULL) {
fprintf(stderr, "cannot read token2");
exit(EXIT_FAILURE);
}
if(parseversion(&b, ver2, NULL)) {
fprintf(stderr, "cannot parse version2 %s\n", ver2);
exit(EXIT_FAILURE);
}
ret = dpkg_version_compare(&a, &b);
if (ret == 0) {
fputc('=', stdout);
} else if (ret > 0) {
fputc('>', stdout);
} else {
fputc('<', stdout);
}
//fputs(orig2, stdout);
free(orig1);
free(orig2);
}
num_versions += 1;
versions = realloc(versions, sizeof(struct myver)*num_versions);
if (versions == NULL) {
perror("malloc failed");
exit(EXIT_FAILURE);
}
if (line[read-1] == '\n') {
line[read-1] = '\0';
}
if (parseversion(&((versions+num_versions-1)->dv), line, NULL)) {
fprintf(stderr, "cannot parse version: %s\n", line);
exit(EXIT_FAILURE);
}
(versions+num_versions-1)->sv = strdup(line);
}
qsort(versions, num_versions, sizeof(struct myver), cmpver);
printf(versions->sv);
for (int i = 1; i < num_versions; i++) {
if (dpkg_version_compare(&((versions+i-1)->dv), &((versions+i)->dv)) == 0) {
printf(" ");
} else {
printf("\n");
}
printf((versions+i)->sv);
}
printf("\n");
exit(EXIT_SUCCESS);
}

41
testdpkg.pl Executable file
View file

@ -0,0 +1,41 @@
#!/usr/bin/perl
use strict;
use warnings;
use Dpkg::Version;
my @versions = ();
while ( my $line = <> ) {
chomp $line;
my $ver = Dpkg::Version->new( $line, check => 1 );
die "not a valid version: $line" if !defined $ver;
push @versions, [ $ver, $line ];
}
sub myvercomp {
# Dpkg::Version overloads <=>
my $ret = $a->[0] <=> $b->[0];
if ( $ret == 0 ) {
# fall back to string comparison
return $a->[1] cmp $b->[1];
}
return $ret;
}
@versions = sort myvercomp @versions;
my $last = shift @versions;
print "$last->[1]";
foreach my $v (@versions) {
if ( ( $last->[0] <=> $v->[0] ) == 0 ) {
print " ";
}
else {
print "\n";
}
$last = $v;
print "$v->[1]";
}
print "\n";

View file

@ -1,29 +0,0 @@
#!/bin/sh
# some examples where `sort -V` does the wrong thing:
# 0.04-1-5 0.4-3
# 0.1+20080921-2 0.1-1+b8
# 0.1+dfsg-4 000.001-4
# 0.1.0+20071012-1.2 0.1.0-1.1
# 0.3.0+20091229-1 0.3.0-7.2
# 0.6.0+git20130305-5 0.6.0-5+b1
# 0:2009.10.04-1 1.0pre11-1
# 0:2009.10.04-1 1.240-1
# 0:2009.10.04-1 1.7.2.4-4.1
# 0:2009.10.04-1 2.3-12
# 0:2009.10.04-1 8.0.184.15484+dfsg-2
# 1.0+dfsg-1 1.00-6
i=0
while read line; do
printf "$i\r" >&2
i=$((i+1))
set -- $line
newest=$( ( echo "$1"; echo "$2" ) | sort -V | tail -n1)
if [ "$1" != "$newest" ]; then
printf "<"
else
printf ">"
fi
printf " %s\t%s\n" $1 $2
done

369131
versions.sorted Normal file

File diff suppressed because it is too large Load diff