gen2deb/find_reduced.py
2012-10-13 19:22:49 +02:00

214 lines
6.4 KiB
Python
Executable file

#!/usr/bin/env python
import sys
if len(sys.argv) != 2:
print "usage: %s /path/to/Sources.bz"%sys.argv[0]
exit(0)
import os
from os import path
from collections import Counter, defaultdict
from subprocess import Popen, PIPE
from functools import cmp_to_key
from datetime import datetime
import apt_pkg
pym_path = path.join(path.dirname(path.realpath(__file__)), "portage", "pym")
sys.path.insert(0, pym_path)
from portage.versions import pkgcmp, pkgsplit
from portage.dep import use_reduce, Atom
from portage.exception import InvalidAtom
# given the starting time, the overall count and the current progress,
# return a string that shows the remaining time in hours/minutes/seconds
def estimate_remaining_time(before, count, i):
now = datetime.now()
delta = now-before
# delta in seconds
delta = delta.days*60*60*24+delta.seconds
if delta == 0 or i == 0:
return "n.a."
speed = float(i)/delta
remaining = (count - i)/speed
remaining_hours = remaining/3600
remaining_minutes = (remaining%3600)/60
remaining_seconds = remaining%60
return "%02d:%02d:%02d"%(remaining_hours, remaining_minutes, remaining_seconds)
# flatten an arbitrarily nested list
def flatten(l):
while l:
while l and isinstance(l[0], list):
l[0:1] = l[0]
if l: yield l.pop(0)
# read mapping between Gentoo and Debian packages
deb2gen = defaultdict(list)
gen2deb = defaultdict(list)
with open("./deb2gen_mapping.list") as f:
for line in f:
d, g = line.strip().split('\t')
deb2gen[d].append(g)
gen2deb[g].append(d)
with open("./deb_source_pkgs.list") as f:
debian_names = [p[4:].strip() for p in f]
portdir = "./portdir"
pkgnames = list()
# traverse ./portdir to find .ebuild files that interest us
for cat in os.listdir(portdir):
catpath = os.path.join(portdir, cat)
if not os.path.isdir(catpath):
continue
for pkg in os.listdir(catpath):
pkgpath = os.path.join(catpath, pkg)
if not os.path.isdir(pkgpath):
continue
# get all .ebuild files
pkgs = [pkgsplit(ver[:-7]) for ver in os.listdir(pkgpath) if ver.endswith(".ebuild")]
if not pkgs:
continue
# only grab newest package
pkg, ver, rev = sorted(pkgs, key=cmp_to_key(pkgcmp), reverse=True)[0]
deb = gen2deb.get("%s/%s"%(cat, pkg), [])
# assure that the package is relevant
cont = [d for d in deb if d in debian_names]
if not cont:
continue
pkgnames.append((cat, pkg, ver, rev))
sys.stderr.write("\rGenerating list of packages... %d"%len(pkgnames))
sys.stderr.write("\rGenerating list of packages... Done.\n")
count = 0
before = datetime.now()
debian_deps = dict()
missing_gen_mappings = list()
# look for reduced build dependencies of gentoo packages by extracting all
# dependencies that need a USE flag to be set
for i, (cat, pkg, ver, rev) in enumerate(pkgnames):
if rev == "r0":
pkgname = "%s/%s-%s"%(cat, pkg, ver)
else:
pkgname = "%s/%s-%s-%s"%(cat, pkg, ver, rev)
p = Popen(["./portage/bin/portageq", "metadata", "/", "ebuild", pkgname, "DEPEND"], stderr=PIPE, stdout=PIPE, env={"PORTDIR": portdir})
r = p.communicate()
if p.returncode != 0:
sys.stderr.write("cannot parse %s. Output: %s\n"%(pkgname, r[1]))
depend = r[0]
all_use = use_reduce(depend, matchall=True)
no_use = use_reduce(depend, matchnone=True)
l = list()
for a in all_use:
# filter out all dependencies that are also present when USE flags are disabled
if a in no_use:
continue
if not isinstance(a, list) :
a = [a]
else:
# there are disjunctions but we better collect to many build dependencies than too few
a = flatten(a)
for p in a:
if p == "||":
continue
try:
l.append(Atom(p).cp)
except InvalidAtom:
sys.stderr.write("Invalid Atom for %s: %s\n"%(pkgname, str(p)))
except TypeError:
sys.stderr.write("TypeError for %s: %s\n"%(pkgname, str(p)))
if l:
ddeps = []
for p in l:
r = gen2deb.get(p)
if not r:
missing_gen_mappings.append(p)
continue
ddeps.extend(r)
debian_deps["%s/%s"%(cat,pkg)] = list(set(ddeps))
count +=1
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h"%(i+1, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
sys.stdout.flush()
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h\n"%(i+1, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
missing_deb_mappings = list()
src2bin = dict()
src_deps = dict()
# get the actual Debian build dependencies for all interesting packages from a
# Sources file
for pkg in apt_pkg.TagFile(sys.argv[1]):
p = pkg['Package']
try:
src_deps[p] = [name for (name,ver,rel) in flatten(apt_pkg.parse_src_depends(pkg.get('Build-Depends', '')))]
except ValueError:
print "cannot parse depends line for %s: %s"%(pkg['Package'], pkg.get('Build-Depends'))
src_deps[p] = []
src2bin[p] = [b.strip() for b in pkg['Binary'].split(',')]
all_droppable = list()
# process all debian source packages and output those build dependencies that
# build from a source package which is not needed by the corresponding Gentoo
# package
for d in debian_names:
# find gentoo package name
g = deb2gen.get(d)
if not g:
missing_deb_mappings.append(d)
continue
# get the droppable dependencies
deps = []
for r in g:
deps.extend(debian_deps.get(r, []))
if not deps:
continue
bindeps = list()
# get the debian build dependencies
pdeps = src_deps[d]
# for each droppable dependency
for r in deps:
# convert source package to binary packages
# only retain those binary packages that are a build dependency
bindeps.extend([p for p in src2bin.get(r, []) if p in pdeps])
if bindeps:
print "src:%s %s"%(d, " ".join(bindeps))
all_droppable.extend(bindeps)
sys.stderr.write("%s\n%s\n"%(list(set(missing_deb_mappings)),list(set(missing_gen_mappings))))
drop_freq = Counter(all_droppable)
for package, freq in drop_freq.most_common(20):
sys.stderr.write("%s: %s\n"%(package, freq))