restructure and integrate code

This commit is contained in:
josch 2012-09-28 10:32:06 +02:00
parent 27f3811300
commit a7e139ffb8
7 changed files with 172 additions and 22204 deletions

34
README
View file

@ -8,7 +8,7 @@ $ ./update-portdir.sh
At this point, you can already run the following script to get the list of At this point, you can already run the following script to get the list of
Debian source packages that can be mapped to Gentoo packages and also have Debian source packages that can be mapped to Gentoo packages and also have
dependencies that can be dropped in Gentoo: build dependencies that can be dropped in Gentoo:
$ ./find_reduced.py $ ./find_reduced.py
@ -17,37 +17,11 @@ It will print:
- a list of Debian packages that cannot be mapped to Gentoo packages - a list of Debian packages that cannot be mapped to Gentoo packages
- a list of Gentoo packages that cannot be mapped to Debian packages - a list of Gentoo packages that cannot be mapped to Debian packages
Above script depends on the following data files to be present: Overview:
portage - portage sourcecode portage - portage sourcecode
* retrieved as a git submodule * retrieved as a git submodule
portdir - database of ebuild files portdir - database of ebuild files
* retrieved by ./update-portdir.sh * retrieved by ./update-portdir.sh
reduced_gen_deps.list - Gentoo packages with reduced dependencies deb_source_pkgs.list - list of interesting Debian source packages
* generated by ./gentoo_dep_list.py deb2gen_mapping.list - mapping of Debian source packages to Gentoo packages
gen_source_pkgs.list - list of Gentoo source packages
* generated by ./gentoo_pkg_list_raw.sh
deb_source_pkgs.list - list of Debian source packages
* supplied by yourself
deb2gen_mapping.list - mapping of Debian to Gentoo packages
* created manually and/or with the help of
./find_pkg_matches.py
deb_source_pkgs.list is filled by the user. All other *.list files can be
regenerated by the following scripts:
$ ./gentoo_dep_list.py
It writes all source packages with reduced build dependencies plus the build
dependencies that can be dropped to reduced_gen_deps.list. Execution takes
about 2.5 hours.
$ ./gentoo_pkg_list_raw.sh
It writes to gen_source_pkgs.list a list of all available Gentoo packages.
$ ./find_pkg_matches.py
Inspects the current content of deb2gen_mapping.list and prints a list of
Debian packages that miss a mapping to Gentoo packages as well as a list of
suggestions of Gentoo packages that sound similar.

View file

@ -1,46 +0,0 @@
#!/usr/bin/env python
import sys
deb2gen = dict()
gen2deb = dict()
with open("./deb2gen_mapping.list") as f:
for line in f:
d, g = line.strip().split('\t')
deb2gen[d] = g
gen2deb[g] = d
with open("./deb_source_pkgs.list") as f:
debian_names = [p[4:].strip() for p in f]
with open("./gen_source_pkgs.list") as f:
gentoo_names = [p.strip() for p in f]
for p in debian_names:
# if mapping exists, continue
orig = p
if deb2gen.get(p):
continue
# first try exact match
matches = [g for g in gentoo_names if p == g.split('/')[-1]]
if matches:
print "%s\t%s"%(orig, matches[0])
continue
# then try substring matches
# remove '-perl' prefix
if p.endswith("-perl"):
p = p[:-5]
# remove 'python-' suffix
if p.startswith("python-"):
p = p[7:]
# remove 'lib' suffix
if p.startswith("lib"):
p = p[3:]
# remove dashes, dots and numbers from the end of the package name
p = p.rstrip(".-1234567890")
matches = [g for g in gentoo_names if p in g.split('/')[-1].lower()]
if matches:
print "%s\t%s"%(orig, str(matches))
continue
# we give up and print the debian source package name that was not matched
print orig

View file

@ -1,13 +1,48 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
if len(sys.argv) != 2:
print "usage: %s /path/to/Sources.bz"%sys.argv[0]
import os
from os import path from os import path
from subprocess import Popen, PIPE
from functools import cmp_to_key
from datetime import datetime
import apt_pkg
pym_path = path.join(path.dirname(path.realpath(__file__)), "portage", "pym") pym_path = path.join(path.dirname(path.realpath(__file__)), "portage", "pym")
sys.path.insert(0, pym_path) sys.path.insert(0, pym_path)
from portage.versions import pkgsplit from portage.versions import pkgcmp, pkgsplit
from portage.dep import use_reduce, Atom
from portage.exception import InvalidAtom
# given the starting time, the overall count and the current progress,
# return a string that shows the remaining time in hours/minutes/seconds
def estimate_remaining_time(before, count, i):
now = datetime.now()
delta = now-before
# delta in seconds
delta = delta.days*60*60*24+delta.seconds
if delta == 0 or i == 0:
return "n.a."
speed = float(i)/delta
remaining = (count - i)/speed
remaining_hours = remaining/3600
remaining_minutes = (remaining%3600)/60
remaining_seconds = remaining%60
return "%02d:%02d:%02d"%(remaining_hours, remaining_minutes, remaining_seconds)
# flatten an arbitrarily nested list
def flatten(l):
while l:
while l and isinstance(l[0], list):
l[0:1] = l[0]
if l: yield l.pop(0)
# TODO: what about mapping to multiple packets?
deb2gen = dict() deb2gen = dict()
gen2deb = dict() gen2deb = dict()
with open("./deb2gen_mapping.list") as f: with open("./deb2gen_mapping.list") as f:
@ -19,32 +54,143 @@ with open("./deb2gen_mapping.list") as f:
with open("./deb_source_pkgs.list") as f: with open("./deb_source_pkgs.list") as f:
debian_names = [p[4:].strip() for p in f] debian_names = [p[4:].strip() for p in f]
gentoo_deps = dict() portdir = "./portdir"
with open("./reduced_gen_deps.list") as f:
for line in f:
p = line.strip().split()
gentoo_deps[pkgsplit(p[0])[0]] = p[1:]
missing_deb_mappings = list() pkgnames = list()
# traverse ./portdir to find .ebuild files that interest us
for cat in os.listdir(portdir):
catpath = os.path.join(portdir, cat)
if not os.path.isdir(catpath):
continue
for pkg in os.listdir(catpath):
pkgpath = os.path.join(catpath, pkg)
if not os.path.isdir(pkgpath):
continue
# get all .ebuild files
pkgs = [pkgsplit(ver[:-7]) for ver in os.listdir(pkgpath) if ver.endswith(".ebuild")]
if not pkgs:
continue
# only grab newest package
pkg, ver, rev = sorted(pkgs, key=cmp_to_key(pkgcmp), reverse=True)[0]
deb = gen2deb.get("%s/%s"%(cat, pkg))
# assure that the package is relevant
if deb not in debian_names:
continue
pkgnames.append((cat, pkg, ver, rev))
sys.stderr.write("\rGenerating list of packages... %d"%len(pkgnames))
sys.stderr.write("\rGenerating list of packages... Done.\n")
count = 0
before = datetime.now()
debian_deps = dict()
missing_gen_mappings = list() missing_gen_mappings = list()
for d in debian_names: # look for reduced build dependencies
g = deb2gen.get(d) for i, (cat, pkg, ver, rev) in enumerate(pkgnames):
if g: if rev == "r0":
deps = gentoo_deps.get(g) pkgname = "%s/%s-%s"%(cat, pkg, ver)
if deps: else:
pkgname = "%s/%s-%s-%s"%(cat, pkg, ver, rev)
p = Popen(["./portage/bin/portageq", "metadata", "/", "ebuild", pkgname, "DEPEND"], stderr=PIPE, stdout=PIPE, env={"PORTDIR": portdir})
r = p.communicate()
if p.returncode != 0:
sys.stderr.write("cannot parse %s. Output: %s\n"%(pkgname, r[1]))
depend = r[0]
all_use = use_reduce(depend, matchall=True)
no_use = use_reduce(depend, matchnone=True)
l = list()
for a in all_use:
# filter out all dependencies that are also present without USE flags enabled
if a in no_use:
continue
if not isinstance(a, list) :
a = [a]
else:
# there are disjunctions but we better collect to many build dependencies than too few
a = flatten(a)
for p in a:
if p == "||":
continue
try:
l.append(Atom(p).cp)
except InvalidAtom:
sys.stderr.write("Invalid Atom for %s: %s\n"%(pkgname, str(p)))
except TypeError:
sys.stderr.write("TypeError for %s: %s\n"%(pkgname, str(p)))
if l:
ddeps = [] ddeps = []
for p in deps: for p in l:
r = gen2deb.get(p) r = gen2deb.get(p)
if r:
ddeps.append(r) if not r:
else:
missing_gen_mappings.append(p) missing_gen_mappings.append(p)
ddeps = list(set(ddeps)) continue
if ddeps:
print d, ddeps ddeps.append(r)
else: debian_deps["%s/%s"%(cat,pkg)] = list(set(ddeps))
count +=1
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h"%(i, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
sys.stdout.flush()
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h\n"%(i, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
missing_deb_mappings = list()
src2bin = dict()
src_deps = dict()
for pkg in apt_pkg.TagFile(sys.argv[1]):
p = pkg['Package']
try:
src_deps[p] = [name for (name,ver,rel) in flatten(apt_pkg.parse_src_depends(pkg.get('Build-Depends', '')))]
except ValueError:
print "cannot parse depends line for %s: %s"%(pkg['Package'], pkg.get('Build-Depends'))
src_deps[p] = []
src2bin[p] = [b.strip() for b in pkg['Binary'].split(',')]
# process all debian source packages
for d in debian_names:
# find gentoo package name
g = deb2gen.get(d)
if not g:
missing_deb_mappings.append(d) missing_deb_mappings.append(d)
continue
# get the droppable dependencies
deps = debian_deps.get(g)
if not deps:
continue
bindeps = list()
# get the debian build dependencies
pdeps = src_deps[d]
# for each droppable dependency
for r in deps:
# convert source package to binary packages
# only retain those binary packages that are a build dependency
bindeps.extend([p for p in src2bin[r] if p in pdeps])
if bindeps:
print d, bindeps
print list(set(missing_deb_mappings)) print list(set(missing_deb_mappings))
print list(set(missing_gen_mappings)) print list(set(missing_gen_mappings))

File diff suppressed because it is too large Load diff

View file

@ -1,96 +0,0 @@
#!/usr/bin/env python
import sys
import os
from os import path
from subprocess import Popen, PIPE
from functools import cmp_to_key
from datetime import datetime
pym_path = path.join(path.dirname(path.realpath(__file__)), "portage", "pym")
sys.path.insert(0, pym_path)
from portage.versions import pkgcmp, pkgsplit
from portage.dep import use_reduce, Atom
from portage.exception import InvalidAtom
def format_timedelta(delta):
secs = delta.days*60*60*24+delta.seconds
hours = secs/3600
minutes = (secs%3600)/60
return "%02d:%02d"%(hours, minutes)
def estimate_remaining_time(before, count, i):
now = datetime.now()
delta = now-before
# delta in seconds
delta = delta.days*60*60*24+delta.seconds
if delta == 0:
return "n.a."
speed = float(i)/delta
remaining = (count - i)/speed
remaining_hours = remaining/3600
remaining_minutes = (remaining%3600)/60
return "%02d:%02d"%(remaining_hours, remaining_minutes)
portdir = "./portdir"
pkgnames = list()
for cat in os.listdir(portdir):
catpath = os.path.join(portdir, cat)
if not os.path.isdir(catpath):
continue
for pkg in os.listdir(catpath):
pkgpath = os.path.join(catpath, pkg)
if not os.path.isdir(pkgpath):
continue
pkgs = [pkgsplit(ver[:-7]) for ver in os.listdir(pkgpath) if ver.endswith(".ebuild")]
if len(pkgs) > 0:
# only grab newest package
pkg, ver, rev = sorted(pkgs, key=cmp_to_key(pkgcmp), reverse=True)[0]
if rev == "r0":
pkgname = "%s/%s-%s"%(cat, pkg, ver)
else:
pkgname = "%s/%s-%s-%s"%(cat, pkg, ver, rev)
pkgnames.append(pkgname)
sys.stderr.write("\rGenerating list of packages... %d"%len(pkgnames))
sys.stderr.write("\rGenerating list of packages... Done.\n")
count = 0
deplist = open("./out", "wb")
before = datetime.now()
for i, pkgname in enumerate(pkgnames):
p = Popen(["./portage/bin/portageq", "metadata", "/", "ebuild", pkgname, "DEPEND"], stderr=PIPE, stdout=PIPE, env={"PORTDIR": portdir})
r = p.communicate()
if p.returncode != 0:
sys.stderr.write("cannot parse %s. Output: %s\n"%(pkgname, r[1]))
depend = r[0]
all_use = use_reduce(depend, matchall=True)
no_use = use_reduce(depend, matchnone=True)
# cannot use sets because of possible sublists which are not hashable
l = []
for a in all_use:
# FIXME: do not discard disjunctions
if a not in no_use and a != "||" and not isinstance(a, list):
try:
l.append(Atom(a).cp)
except InvalidAtom:
sys.stderr.write("Invalid Atom for %s: %s\n"%(pkgname, str(a)))
except TypeError:
sys.stderr.write("TypeError for %s: %s\n"%(pkgname, str(a)))
if len(l) > 0:
deplist.write("%s %s\n"%(pkgname, " ".join(l)))
count +=1
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h"%(i, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
sys.stdout.flush()
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h\n"%(i, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
sys.stderr.write("Done. Took %s h\n"%format_timedelta(datetime.now()-before))

View file

@ -1,5 +0,0 @@
#!/bin/bash
for f in `find "./portdir/" -mindepth 2 -maxdepth 2 -type d`; do
echo ${f#./portdir/}
done > gen_source_pkgs.list

File diff suppressed because it is too large Load diff